{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 随机 agent\n",
    "class RandomAgent(object):\n",
    "    def __init__(self, action_space):\n",
    "        self.action_space = action_space\n",
    "    \n",
    "    def act(self,observation,reward,done):\n",
    "        return self.action_space.sample()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class BiasedAgent(object):\n",
    "    def __init__(self,action_space):\n",
    "        self.action_space = action_space\n",
    "        self.action_always = self.action_space.sample()\n",
    "    \n",
    "    def act(self,observation,reward,done):\n",
    "        return self.action_always"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "g:\\Anaconda3\\envs\\torch\\lib\\site-packages\\gym\\logger.py:30: UserWarning: \u001b[33mWARN: Box bound precision lowered by casting to float32\u001b[0m\n",
      "  warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Discrete(4)\n",
      "episode 0-step 0, taking action 3, observation [ 0.00243788  1.4195321   0.12807022  0.17825912 -0.00432202 -0.05907182\n",
      "  0.          0.        ]\n",
      "episode 0-step 1, taking action 1, observation [ 0.00360622  1.4229496   0.11771735  0.15188724 -0.00519524 -0.01746539\n",
      "  0.          0.        ]\n",
      "episode 0-step 2, taking action 2, observation [ 0.00467758  1.4268925   0.1085073   0.17523889 -0.00655343 -0.02716633\n",
      "  0.          0.        ]\n",
      "episode 0-step 3, taking action 2, observation [ 0.00567169  1.4311221   0.10118948  0.18797058 -0.00829834 -0.03490174\n",
      "  0.          0.        ]\n",
      "episode 0-step 4, taking action 3, observation [ 0.00675774  1.4347509   0.11269271  0.16125652 -0.01234836 -0.081008\n",
      "  0.          0.        ]\n",
      "episode 0-step 5, taking action 3, observation [ 0.00790558  1.4377804   0.12044822  0.13458562 -0.0179493  -0.11202876\n",
      "  0.          0.        ]\n",
      "episode 0-step 6, taking action 1, observation [ 0.00898285  1.4402013   0.11158748  0.10754989 -0.0217722  -0.07646488\n",
      "  0.          0.        ]\n",
      "episode 0-step 7, taking action 2, observation [ 0.01015186  1.4432164   0.12040063  0.13394468 -0.02524228 -0.06940778\n",
      "  0.          0.        ]\n",
      "episode 0-step 8, taking action 2, observation [ 0.01146164  1.4461905   0.1338467   0.1321291  -0.02808844 -0.05692831\n",
      "  0.          0.        ]\n",
      "episode 0-step 9, taking action 3, observation [ 0.01283093  1.4485594   0.14132087  0.10519738 -0.03243335 -0.08690594\n",
      "  0.          0.        ]\n",
      "episode 0-step 10, taking action 2, observation [ 0.01439657  1.451458    0.16012785  0.12875514 -0.03596403 -0.07062013\n",
      "  0.          0.        ]\n",
      "episode 0-step 11, taking action 3, observation [ 0.01602411  1.4537536   0.16789347  0.10188747 -0.04105132 -0.10175478\n",
      "  0.          0.        ]\n",
      "episode 0-step 12, taking action 3, observation [ 0.01771526  1.4554516   0.1758564   0.07526364 -0.04772768 -0.13353929\n",
      "  0.          0.        ]\n",
      "episode 0-step 13, taking action 3, observation [ 0.01947641  1.456549    0.18463609  0.0484887  -0.05615821 -0.16862631\n",
      "  0.          0.        ]\n",
      "episode 0-step 14, taking action 1, observation [ 0.02117014  1.4570433   0.17618534  0.02169483 -0.06289394 -0.13472691\n",
      "  0.          0.        ]\n",
      "episode 0-step 15, taking action 1, observation [ 0.02278585  1.4569402   0.16638207 -0.00479198 -0.06766099 -0.09534954\n",
      "  0.          0.        ]\n",
      "episode 0-step 16, taking action 3, observation [ 0.02448406  1.456223    0.17673656 -0.03219338 -0.07451394 -0.13707131\n",
      "  0.          0.        ]\n",
      "episode 0-step 17, taking action 0, observation [ 0.02618246  1.4549067   0.17675649 -0.05886661 -0.08136455 -0.13702485\n",
      "  0.          0.        ]\n",
      "episode 0-step 18, taking action 3, observation [ 0.02794676  1.4529942   0.18497944 -0.08548049 -0.08985097 -0.1697439\n",
      "  0.          0.        ]\n",
      "episode 0-step 19, taking action 1, observation [ 0.02963781  1.4504849   0.17578802 -0.11195134 -0.09648977 -0.13278793\n",
      "  0.          0.        ]\n",
      "episode 0-step 20, taking action 3, observation [ 0.03141346  1.447366    0.18640015 -0.13920756 -0.10526244 -0.17546955\n",
      "  0.          0.        ]\n",
      "episode 0-step 21, taking action 1, observation [ 0.03310356  1.4436699   0.17561619 -0.16474159 -0.11183387 -0.13144025\n",
      "  0.          0.        ]\n",
      "episode 0-step 22, taking action 2, observation [ 0.03486118  1.4407753   0.18253958 -0.12917511 -0.11857864 -0.13490704\n",
      "  0.          0.        ]\n",
      "episode 0-step 23, taking action 3, observation [ 0.03671045  1.4372672   0.19402586 -0.15665695 -0.12763864 -0.18121614\n",
      "  0.          0.        ]\n",
      "episode 0-step 24, taking action 3, observation [ 0.03865042  1.4331273   0.20544139 -0.18502119 -0.13903248 -0.22787678\n",
      "  0.          0.        ]\n",
      "episode 0-step 25, taking action 3, observation [ 0.04068222  1.4283795   0.21691008 -0.21234313 -0.15273492 -0.2740484\n",
      "  0.          0.        ]\n",
      "episode 0-step 26, taking action 2, observation [ 0.04282369  1.4244841   0.22803576 -0.17461494 -0.16661647 -0.27763093\n",
      "  0.          0.        ]\n",
      "episode 0-step 27, taking action 0, observation [ 0.04496555  1.4199913   0.2280319  -0.20129754 -0.18049781 -0.27762726\n",
      "  0.          0.        ]\n",
      "episode 0-step 28, taking action 0, observation [ 0.0471076   1.4149009   0.2280277  -0.22798009 -0.19437899 -0.27762356\n",
      "  0.          0.        ]\n",
      "episode 0-step 29, taking action 0, observation [ 0.04925003  1.4092131   0.2280232  -0.2546626  -0.20825997 -0.2776199\n",
      "  0.          0.        ]\n",
      "episode 0-step 30, taking action 1, observation [ 0.05131292  1.4029571   0.21795666 -0.27972263 -0.22004248 -0.23565015\n",
      "  0.          0.        ]\n",
      "episode 0-step 31, taking action 1, observation [ 0.05330744  1.3961239   0.2093182  -0.3052105  -0.23003925 -0.19993535\n",
      "  0.          0.        ]\n",
      "episode 0-step 32, taking action 1, observation [ 0.05523491  1.388721    0.20082164 -0.33030108 -0.23824315 -0.16407765\n",
      "  0.          0.        ]\n",
      "episode 0-step 33, taking action 3, observation [ 0.05724201  1.3806858   0.21087079 -0.35880327 -0.2485663  -0.20646326\n",
      "  0.          0.        ]\n",
      "episode 0-step 34, taking action 0, observation [ 0.0592495   1.3720517   0.2108676  -0.3854786  -0.25888938 -0.20646179\n",
      "  0.          0.        ]\n",
      "episode 0-step 35, taking action 2, observation [ 0.06143351  1.3640268   0.22866204 -0.3585211  -0.26937672 -0.20974672\n",
      "  0.          0.        ]\n",
      "episode 0-step 36, taking action 3, observation [ 0.06370039  1.3553723   0.23902825 -0.3869632  -0.28204337 -0.25333288\n",
      "  0.          0.        ]\n",
      "episode 0-step 37, taking action 0, observation [ 0.06596756  1.34612     0.23902276 -0.4136428  -0.29470983 -0.25333008\n",
      "  0.          0.        ]\n",
      "episode 0-step 38, taking action 3, observation [ 0.06829691  1.3362396   0.2468082  -0.44200563 -0.30905405 -0.28688386\n",
      "  0.          0.        ]\n",
      "episode 0-step 39, taking action 1, observation [ 0.07054386  1.3257997   0.23635116 -0.46651876 -0.3211552  -0.24202275\n",
      "  0.          0.        ]\n",
      "episode 0-step 40, taking action 0, observation [ 0.0727912   1.3147618   0.23634548 -0.49319708 -0.33325624 -0.24202028\n",
      "  0.          0.        ]\n",
      "episode 0-step 41, taking action 1, observation [ 0.07496615  1.3031642   0.22713217 -0.51771456 -0.34334758 -0.20182662\n",
      "  0.          0.        ]\n",
      "episode 0-step 42, taking action 1, observation [ 0.07706823  1.2910142   0.21784678 -0.5418473  -0.35136512 -0.1603511\n",
      "  0.          0.        ]\n",
      "episode 0-step 43, taking action 0, observation [ 0.07917061  1.2782649   0.21784401 -0.568519   -0.35938266 -0.16035058\n",
      "  0.          0.        ]\n",
      "episode 0-step 44, taking action 1, observation [ 0.08120146  1.2649689   0.20870367 -0.59235287 -0.3653097  -0.1185408\n",
      "  0.          0.        ]\n",
      "episode 0-step 45, taking action 1, observation [ 0.08314896  1.2511234   0.1981245  -0.61621445 -0.36887914 -0.07138903\n",
      "  0.          0.        ]\n",
      "episode 0-step 46, taking action 0, observation [ 0.08509655  1.2366784   0.19812392 -0.64288205 -0.3724486  -0.07138897\n",
      "  0.          0.        ]\n",
      "episode 0-step 47, taking action 0, observation [ 0.08704414  1.2216332   0.19812334 -0.6695497  -0.37601805 -0.07138892\n",
      "  0.          0.        ]\n",
      "episode 0-step 48, taking action 0, observation [ 0.08899174  1.2059883   0.19812275 -0.69621736 -0.37958753 -0.07138889\n",
      "  0.          0.        ]\n",
      "episode 0-step 49, taking action 3, observation [ 0.09101572  1.1896998   0.20775338 -0.7253782  -0.38529748 -0.11419898\n",
      "  0.          0.        ]\n",
      "episode 0-step 50, taking action 3, observation [ 0.09311514  1.1727556   0.21737726 -0.75509864 -0.39322728 -0.15859596\n",
      "  0.          0.        ]\n",
      "episode 0-step 51, taking action 1, observation [ 0.09514485  1.1552632   0.20845461 -0.7789732  -0.39910197 -0.11749367\n",
      "  0.          0.        ]\n",
      "episode 0-step 52, taking action 1, observation [ 0.09710817  1.137226    0.19995126 -0.8026763  -0.402972   -0.07740069\n",
      "  0.          0.        ]\n",
      "episode 0-step 53, taking action 2, observation [ 0.09931917  1.1199397   0.22524047 -0.76945996 -0.40742233 -0.08900683\n",
      "  0.          0.        ]\n",
      "episode 0-step 54, taking action 2, observation [ 0.10162897  1.1029177   0.2357146  -0.7579013  -0.41250846 -0.10172238\n",
      "  0.          0.        ]\n",
      "episode 0-step 55, taking action 0, observation [ 0.10393896  1.085296    0.23571329 -0.78457    -0.41759455 -0.10172224\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 0-step 56, taking action 3, observation [ 0.10630055  1.0670298   0.24232809 -0.8136723  -0.424265   -0.13340895\n",
      "  0.          0.        ]\n",
      "episode 0-step 57, taking action 2, observation [ 0.10898685  1.048771    0.27423006 -0.81319505 -0.4303377  -0.12145398\n",
      "  0.          0.        ]\n",
      "episode 0-step 58, taking action 1, observation [ 0.11158209  1.0299625   0.26276135 -0.8369295  -0.4338612  -0.0704698\n",
      "  0.          0.        ]\n",
      "episode 0-step 59, taking action 0, observation [ 0.11417732  1.0105542   0.2627607  -0.86359715 -0.4373847  -0.07046971\n",
      "  0.          0.        ]\n",
      "episode 0-step 60, taking action 0, observation [ 0.11677255  0.9905459   0.2627601  -0.89026475 -0.4409082  -0.07046969\n",
      "  0.          0.        ]\n",
      "episode 0-step 61, taking action 1, observation [ 0.11930437  0.9699776   0.2547325  -0.91463673 -0.44260794 -0.03399497\n",
      "  0.          0.        ]\n",
      "episode 0-step 62, taking action 3, observation [ 0.12191429  0.9487481   0.26470083 -0.9447069  -0.44666252 -0.08109124\n",
      "  0.          0.        ]\n",
      "episode 0-step 63, taking action 3, observation [ 0.12457895  0.9268719   0.27171484 -0.9739654  -0.45240712 -0.11489198\n",
      "  0.          0.        ]\n",
      "episode 0-step 64, taking action 3, observation [ 0.12732229  0.9043376   0.28169128 -1.003929   -0.46049303 -0.16171892\n",
      "  0.          0.        ]\n",
      "episode 0-step 65, taking action 1, observation [ 0.1299942   0.8812665   0.27250454 -1.0271544  -0.4663526  -0.11719167\n",
      "  0.          0.        ]\n",
      "episode 0-step 66, taking action 3, observation [ 0.13274345  0.8575433   0.28224924 -1.0568504  -0.4744696  -0.16233984\n",
      "  0.          0.        ]\n",
      "episode 0-step 67, taking action 0, observation [ 0.13549289  0.8332209   0.2822455  -1.0835221  -0.4825865  -0.16233778\n",
      "  0.          0.        ]\n",
      "episode 0-step 68, taking action 3, observation [ 0.13830185  0.8082393   0.2898855  -1.1134658  -0.49263337 -0.20093818\n",
      "  0.          0.        ]\n",
      "episode 0-step 69, taking action 1, observation [ 0.14104757  0.78270864  0.28179902 -1.1373059  -0.5007481  -0.16229412\n",
      "  0.          0.        ]\n",
      "episode 0-step 70, taking action 0, observation [ 0.1437935   0.7565788   0.28179508 -1.1639774  -0.5088628  -0.1622934\n",
      "  0.          0.        ]\n",
      "episode 0-step 71, taking action 0, observation [ 0.14653978  0.7298497   0.28179103 -1.1906489  -0.5169774  -0.16229276\n",
      "  0.          0.        ]\n",
      "episode 0-step 72, taking action 1, observation [ 0.14921808  0.7025907   0.27299127 -1.2134796  -0.5228433  -0.11731689\n",
      "  0.          0.        ]\n",
      "episode 0-step 73, taking action 2, observation [ 0.1522313   0.67522573  0.3059442  -1.2180079  -0.52812815 -0.10569708\n",
      "  0.          0.        ]\n",
      "episode 0-step 74, taking action 0, observation [ 0.15524454  0.6472611   0.30594248 -1.2446766  -0.533413   -0.10569689\n",
      "  0.          0.        ]\n",
      "episode 0-step 75, taking action 1, observation [ 0.15817776  0.6187689   0.29570234 -1.2672641  -0.5361485  -0.05470898\n",
      "  0.          0.        ]\n",
      "episode 0-step 76, taking action 3, observation [ 0.16119203  0.589618    0.30591854 -1.2973847  -0.5413088  -0.10320686\n",
      "  0.          0.        ]\n",
      "episode 0-step 77, taking action 2, observation [ 0.16473007  0.5607427   0.3576831  -1.2849102  -0.545784   -0.08950369\n",
      "  0.          0.        ]\n",
      "episode 0-step 78, taking action 1, observation [ 0.16819935  0.5313213   0.34897548 -1.3084519  -0.54815614 -0.04744332\n",
      "  0.          0.        ]\n",
      "episode 0-step 79, taking action 3, observation [ 0.17173967  0.501225    0.35814366 -1.3392946  -0.55291766 -0.09523128\n",
      "  0.          0.        ]\n",
      "episode 0-step 80, taking action 3, observation [ 0.17536077  0.47044796  0.36849877 -1.370516   -0.56034774 -0.14860108\n",
      "  0.          0.        ]\n",
      "episode 0-step 81, taking action 3, observation [ 0.17904052  0.43901584  0.3759766  -1.4003564  -0.5696833  -0.18671189\n",
      "  0.          0.        ]\n",
      "episode 0-step 82, taking action 1, observation [ 0.18266296  0.4070371   0.36860773 -1.424013   -0.5771627  -0.14958918\n",
      "  0.          0.        ]\n",
      "episode 0-step 83, taking action 2, observation [ 0.18666773  0.37513542  0.4065227  -1.4204938  -0.5842989  -0.1427249\n",
      "  0.          0.        ]\n",
      "episode 0-step 84, taking action 2, observation [ 0.19107266  0.34356046  0.44653735 -1.4060134  -0.59146744 -0.14337035\n",
      "  0.          0.        ]\n",
      "episode 0-step 85, taking action 3, observation [ 0.19554825  0.31130964  0.45560116 -1.4370003  -0.6010527  -0.19170545\n",
      "  0.          0.        ]\n",
      "episode 0-step 86, taking action 0, observation [ 0.20002432  0.27845988  0.4555947  -1.4636731  -0.6106379  -0.19170456\n",
      "  0.          0.        ]\n",
      "episode 0-step 87, taking action 0, observation [ 0.20450076  0.24501115  0.4555881  -1.4903462  -0.6202231  -0.19170353\n",
      "  0.          0.        ]\n",
      "episode 0-step 88, taking action 1, observation [ 0.20893097  0.21102227  0.44948435 -1.5137161  -0.6280892  -0.15732172\n",
      "  0.          0.        ]\n",
      "episode 0-step 89, taking action 1, observation [ 0.21329126  0.17651834  0.44039345 -1.5356343  -0.63342774 -0.10677142\n",
      "  0.          0.        ]\n",
      "episode 0-step 90, taking action 2, observation [ 0.21841097  0.1425238   0.5156566  -1.5126861  -0.6379598  -0.0906409\n",
      "  0.          0.        ]\n",
      "episode 0-step 91, taking action 3, observation [ 0.22358522  0.10786301  0.52270937 -1.5431088  -0.64447147 -0.130234\n",
      "  0.          0.        ]\n",
      "episode 0-step 92, taking action 2, observation [ 0.22914381  0.07374903  0.56168824 -1.5191137  -0.6516863  -0.14429674\n",
      "  1.          0.        ]\n",
      "episode 0-step 93, taking action 3, observation [ 0.22885227  0.06860247  0.11367152 -0.34149775 -0.7595542  -2.0130944\n",
      "  1.          0.        ]\n",
      "episode 0-step 94, taking action 1, observation [ 0.2289218   0.06264783  0.06678481 -0.3057334  -0.8440698  -1.6899484\n",
      "  1.          0.        ]\n",
      "episode 0-step 95, taking action 3, observation [ 0.22848749  0.06103264 -0.0537184  -0.07488074 -0.82258433  0.29485264\n",
      "  1.          0.        ]\n",
      "episode 0-step 96, taking action 3, observation [ 0.22798434  0.05935579 -0.06833683 -0.06967767 -0.79143673  0.5198402\n",
      "  1.          0.        ]\n",
      "episode 0-step 97, taking action 3, observation [ 0.2274148   0.05753117 -0.0801581  -0.0706401  -0.75649804  0.63311315\n",
      "  1.          0.        ]\n",
      "episode 0-step 98, taking action 0, observation [ 0.22681455  0.05534743 -0.08871132 -0.08131003 -0.7169764   0.76708007\n",
      "  1.          0.        ]\n",
      "episode 0-step 99, taking action 3, observation [ 0.22623587  0.05269032 -0.08800697 -0.10123314 -0.67812645  0.7770189\n",
      "  1.          0.        ]\n",
      "episode 1-step 0, taking action 2, observation [ 0.00199356  1.4262948   0.09713344  0.34312025 -0.00277321 -0.03154219\n",
      "  0.          0.        ]\n",
      "episode 1-step 1, taking action 2, observation [ 0.0028862   1.4343944   0.09116653  0.35997602 -0.00465979 -0.03773532\n",
      "  0.          0.        ]\n",
      "episode 1-step 2, taking action 0, observation [ 0.00377884  1.4418936   0.09117217  0.33329955 -0.00654601 -0.03772774\n",
      "  0.          0.        ]\n",
      "episode 1-step 3, taking action 0, observation [ 0.00467148  1.4487929   0.09117773  0.30663428 -0.00843206 -0.03772464\n",
      "  0.          0.        ]\n",
      "episode 1-step 4, taking action 2, observation [ 0.00570192  1.4558015   0.1043059   0.31147555 -0.00967859 -0.02493307\n",
      "  0.          0.        ]\n",
      "episode 1-step 5, taking action 1, observation [ 0.00666971  1.4622082   0.09644774  0.28474873 -0.00934945  0.00658331\n",
      "  0.          0.        ]\n",
      "episode 1-step 6, taking action 0, observation [ 0.0076375   1.4680151   0.09644662  0.25808316 -0.0090208   0.00657341\n",
      "  0.          0.        ]\n",
      "episode 1-step 7, taking action 1, observation [ 0.00854025  1.4732164   0.08829091  0.23118177 -0.00705789  0.03926178\n",
      "  0.          0.        ]\n",
      "episode 1-step 8, taking action 1, observation [ 0.00936184  1.4778073   0.07811227  0.2040529  -0.00305801  0.08000488\n",
      "  0.          0.        ]\n",
      "episode 1-step 9, taking action 2, observation [1.0186100e-02 1.4832263e+00 7.8384228e-02 2.4084358e-01 9.4052899e-04\n",
      " 7.9978049e-02 0.0000000e+00 0.0000000e+00]\n",
      "episode 1-step 10, taking action 1, observation [0.01094885 1.4880503  0.07067784 0.2143916  0.00648123 0.11082406\n",
      " 0.         0.        ]\n",
      "episode 1-step 11, taking action 3, observation [0.01180029 1.4922761  0.08180137 0.18779211 0.00978748 0.06613081\n",
      " 0.         0.        ]\n",
      "episode 1-step 12, taking action 3, observation [0.01274595 1.4959052  0.09362037 0.16128318 0.0107234  0.01872\n",
      " 0.         0.        ]\n",
      "episode 1-step 13, taking action 1, observation [0.01361351 1.4989247  0.08383175 0.13417314 0.01362267 0.05799031\n",
      " 0.         0.        ]\n",
      "episode 1-step 14, taking action 3, observation [0.01456108 1.501353   0.09386244 0.10792015 0.01450622 0.01767255\n",
      " 0.         0.        ]\n",
      "episode 1-step 15, taking action 0, observation [0.01550865 1.5031813  0.09386051 0.08125134 0.01539089 0.01769501\n",
      " 0.         0.        ]\n",
      "episode 1-step 16, taking action 2, observation [0.01652164 1.5055138  0.10003798 0.10364863 0.01663502 0.02488543\n",
      " 0.         0.        ]\n",
      "episode 1-step 17, taking action 2, observation [0.01738977 1.5085511  0.0861721  0.13498451 0.01726901 0.0126808\n",
      " 0.         0.        ]\n",
      "episode 1-step 18, taking action 1, observation [0.01819553 1.5109923  0.07833965 0.10847326 0.01947134 0.04405067\n",
      " 0.         0.        ]\n",
      "episode 1-step 19, taking action 0, observation [0.0190012  1.5128337  0.07833279 0.08180905 0.0216724  0.0440255\n",
      " 0.         0.        ]\n",
      "episode 1-step 20, taking action 1, observation [0.0197217  1.5140655  0.06766123 0.05467991 0.02601343 0.08682843\n",
      " 0.         0.        ]\n",
      "episode 1-step 21, taking action 1, observation [0.02036638 1.5146872  0.0581562  0.02750942 0.03225875 0.12491743\n",
      " 0.         0.        ]\n",
      "episode 1-step 22, taking action 3, observation [2.1090794e-02 1.5147213e+00 6.8172529e-02 1.4201981e-03 3.6486711e-02\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 8.4566876e-02 0.0000000e+00 0.0000000e+00]\n",
      "episode 1-step 23, taking action 2, observation [0.02189035 1.51537    0.07519414 0.02870488 0.04120087 0.09429146\n",
      " 0.         0.        ]\n",
      "episode 1-step 24, taking action 1, observation [2.2596072e-02 1.5153937e+00 6.3400537e-02 8.3816692e-04 4.8286628e-02\n",
      " 1.4171535e-01 0.0000000e+00 0.0000000e+00]\n",
      "episode 1-step 25, taking action 1, observation [ 0.023209    1.5148207   0.05176073 -0.02579886  0.0577001   0.1882697\n",
      "  0.          0.        ]\n",
      "episode 1-step 26, taking action 2, observation [0.02367687 1.5151839  0.037666   0.01576176 0.06671727 0.18034367\n",
      " 0.         0.        ]\n",
      "episode 1-step 27, taking action 3, observation [ 0.02423878  1.5149481   0.04947038 -0.01079476  0.07337288  0.13311237\n",
      "  0.          0.        ]\n",
      "episode 1-step 28, taking action 3, observation [ 0.02486496  1.5141271   0.05754409 -0.0367432   0.07839598  0.10046178\n",
      "  0.          0.        ]\n",
      "episode 1-step 29, taking action 0, observation [ 0.02549114  1.5127065   0.05754434 -0.06341197  0.08341905  0.10046164\n",
      "  0.          0.        ]\n",
      "episode 1-step 30, taking action 0, observation [ 0.02611732  1.5106864   0.05754459 -0.09008073  0.08844212  0.10046151\n",
      "  0.          0.        ]\n",
      "episode 1-step 31, taking action 2, observation [ 0.02666254  1.508619    0.04967576 -0.09217345  0.09324288  0.09601509\n",
      "  0.          0.        ]\n",
      "episode 1-step 32, taking action 2, observation [ 0.02727337  1.5073307   0.05554481 -0.05761221  0.09874255  0.1099935\n",
      "  0.          0.        ]\n",
      "episode 1-step 33, taking action 1, observation [ 0.02778616  1.5054302   0.04324074 -0.08501572  0.10672171  0.15958306\n",
      "  0.          0.        ]\n",
      "episode 1-step 34, taking action 3, observation [ 0.02836237  1.5029461   0.0512265  -0.11087266  0.11307797  0.12712502\n",
      "  0.          0.        ]\n",
      "episode 1-step 35, taking action 2, observation [ 0.02870302  1.5008233   0.02842375 -0.09478749  0.11868585  0.11215748\n",
      "  0.          0.        ]\n",
      "episode 1-step 36, taking action 3, observation [ 0.0291214   1.4981254   0.03821304 -0.12019725  0.12229067  0.07209632\n",
      "  0.          0.        ]\n",
      "episode 1-step 37, taking action 1, observation [ 0.02947273  1.4948145   0.02978862 -0.14759932  0.12760389  0.10626475\n",
      "  0.          0.        ]\n",
      "episode 1-step 38, taking action 2, observation [ 0.02988815  1.4919331   0.03549625 -0.12860076  0.13361512  0.12022511\n",
      "  0.          0.        ]\n",
      "episode 1-step 39, taking action 0, observation [ 0.03030338  1.488452    0.03549683 -0.15527043  0.13962638  0.12022482\n",
      "  0.          0.        ]\n",
      "episode 1-step 40, taking action 3, observation [ 0.03081446  1.4843996   0.04756386 -0.18043266  0.14316526  0.07077756\n",
      "  0.          0.        ]\n",
      "episode 1-step 41, taking action 3, observation [ 0.03139334  1.4797618   0.05607725 -0.2062965   0.14497335  0.03616177\n",
      "  0.          0.        ]\n",
      "episode 1-step 42, taking action 0, observation [ 0.03197222  1.4745241   0.0560773  -0.23296346  0.14678146  0.03616176\n",
      "  0.          0.        ]\n",
      "episode 1-step 43, taking action 2, observation [ 0.03227253  1.4696966   0.02904249 -0.21465406  0.14777055  0.01978182\n",
      "  0.          0.        ]\n",
      "episode 1-step 44, taking action 3, observation [ 0.03264246  1.464275    0.03774361 -0.24089314  0.14701244 -0.01516237\n",
      "  0.          0.        ]\n",
      "episode 1-step 45, taking action 2, observation [ 0.03298931  1.4594648   0.03500379 -0.21375449  0.14669682 -0.00631277\n",
      "  0.          0.        ]\n",
      "episode 1-step 46, taking action 2, observation [ 0.03322373  1.4551905   0.02376455 -0.18993215  0.14637905 -0.00635544\n",
      "  0.          0.        ]\n",
      "episode 1-step 47, taking action 3, observation [ 0.03352146  1.450327    0.0316965  -0.21596523  0.1444549  -0.03848324\n",
      "  0.          0.        ]\n",
      "episode 1-step 48, taking action 3, observation [ 0.03391514  1.4448757   0.04373759 -0.24187137  0.14010318 -0.08703441\n",
      "  0.          0.        ]\n",
      "episode 1-step 49, taking action 3, observation [ 0.03439245  1.4388319   0.0542066  -0.26801723  0.13364804 -0.12910266\n",
      "  0.          0.        ]\n",
      "episode 1-step 50, taking action 1, observation [ 0.0347827   1.4321779   0.04329336 -0.29536235  0.12939301 -0.08510013\n",
      "  0.          0.        ]\n",
      "episode 1-step 51, taking action 3, observation [ 0.03523331  1.4249456   0.0509162  -0.32093382  0.12357065 -0.11644717\n",
      "  0.          0.        ]\n",
      "episode 1-step 52, taking action 2, observation [ 0.03546753  1.4179299   0.02996059 -0.31128517  0.11706495 -0.13011372\n",
      "  0.          0.        ]\n",
      "episode 1-step 53, taking action 3, observation [ 0.0357626   1.4103355   0.03763315 -0.33690724  0.10898702 -0.16155867\n",
      "  0.          0.        ]\n",
      "episode 1-step 54, taking action 0, observation [ 0.03605747  1.4021422   0.03763407 -0.36357933  0.10090912 -0.16155794\n",
      "  0.          0.        ]\n",
      "episode 1-step 55, taking action 2, observation [ 0.03640385  1.394556    0.04216259 -0.3366721   0.09344306 -0.1493213\n",
      "  0.          0.        ]\n",
      "episode 1-step 56, taking action 0, observation [ 0.03675013  1.3863707   0.04216327 -0.36334342  0.08597702 -0.14932075\n",
      "  0.          0.        ]\n",
      "episode 1-step 57, taking action 2, observation [ 0.03719454  1.3786474   0.05124269 -0.34287474  0.07924496 -0.13464141\n",
      "  0.          0.        ]\n",
      "episode 1-step 58, taking action 0, observation [ 0.03763895  1.370325    0.05124315 -0.36954525  0.0725129  -0.134641\n",
      "  0.          0.        ]\n",
      "episode 1-step 59, taking action 2, observation [ 0.03818426  1.3624629   0.06062896 -0.34914148  0.0664837  -0.12058427\n",
      "  0.          0.        ]\n",
      "episode 1-step 60, taking action 1, observation [ 0.03864565  1.3539956   0.0500939  -0.37616178  0.0625685  -0.07830402\n",
      "  0.          0.        ]\n",
      "episode 1-step 61, taking action 3, observation [ 0.0391985   1.3449447   0.06158385 -0.40200385  0.05633723 -0.12462529\n",
      "  0.          0.        ]\n",
      "episode 1-step 62, taking action 0, observation [ 0.03975134  1.3352945   0.06158414 -0.42867377  0.05010599 -0.12462499\n",
      "  0.          0.        ]\n",
      "episode 1-step 63, taking action 3, observation [ 0.04038448  1.3250391   0.07164918 -0.4555395   0.0418654  -0.16481158\n",
      "  0.          0.        ]\n",
      "episode 1-step 64, taking action 3, observation [ 0.04110365  1.3141824   0.08244938 -0.4822605   0.03146474 -0.2080135\n",
      "  0.          0.        ]\n",
      "episode 1-step 65, taking action 2, observation [ 0.04186363  1.3039315   0.08621818 -0.45541972  0.02136973 -0.20189996\n",
      "  0.          0.        ]\n",
      "episode 1-step 66, taking action 0, observation [ 0.04262362  1.2930818   0.08621851 -0.48209494  0.01127481 -0.20189853\n",
      "  0.          0.        ]\n",
      "episode 1-step 67, taking action 1, observation [ 0.04328728  1.2816273   0.07414697 -0.509052    0.00359854 -0.15352513\n",
      "  0.          0.        ]\n",
      "episode 1-step 68, taking action 2, observation [ 0.0438036   1.2701418   0.06008766 -0.5104724  -0.00475337 -0.16703829\n",
      "  0.          0.        ]\n",
      "episode 1-step 69, taking action 2, observation [ 0.04437504  1.259598    0.06536816 -0.46866834 -0.01287239 -0.16238028\n",
      "  0.          0.        ]\n",
      "episode 1-step 70, taking action 3, observation [ 0.04504471  1.2484609   0.07769159 -0.49510965 -0.023458   -0.21171227\n",
      "  0.          0.        ]\n",
      "episode 1-step 71, taking action 0, observation [ 0.04571448  1.2367252   0.07769134 -0.5217857  -0.03404355 -0.21171065\n",
      "  0.          0.        ]\n",
      "episode 1-step 72, taking action 2, observation [ 0.04652386  1.2250047   0.09106855 -0.5211845  -0.04404756 -0.20008036\n",
      "  0.          0.        ]\n",
      "episode 1-step 73, taking action 3, observation [ 0.04740124  1.2126887   0.0995831  -0.54777014 -0.05575361 -0.23412094\n",
      "  0.          0.        ]\n",
      "episode 1-step 74, taking action 3, observation [ 0.04836474  1.1997563   0.11037616 -0.575351   -0.06963691 -0.2776659\n",
      "  0.          0.        ]\n",
      "episode 1-step 75, taking action 3, observation [ 0.04939098  1.1862161   0.11825138 -0.60257393 -0.08510786 -0.30941904\n",
      "  0.          0.        ]\n",
      "episode 1-step 76, taking action 0, observation [ 0.05041742  1.1720796   0.11824906 -0.6292605  -0.10057855 -0.30941397\n",
      "  0.          0.        ]\n",
      "episode 1-step 77, taking action 3, observation [ 0.05154257  1.1573403   0.13058726 -0.6563872  -0.11852459 -0.35892087\n",
      "  0.          0.        ]\n",
      "episode 1-step 78, taking action 2, observation [ 0.05292864  1.1433592   0.15593061 -0.6228455  -0.13576692 -0.34484646\n",
      "  0.          0.        ]\n",
      "episode 1-step 79, taking action 0, observation [ 0.05431499  1.1287823   0.15592587 -0.6495368  -0.15300891 -0.3448394\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 1-step 80, taking action 3, observation [ 0.05576219  1.1135939   0.16350268 -0.6770796  -0.17180029 -0.37582782\n",
      "  0.          0.        ]\n",
      "episode 1-step 81, taking action 2, observation [ 0.05748367  1.0987434   0.19012754 -0.6622072  -0.18983483 -0.3606913\n",
      "  0.          0.        ]\n",
      "episode 1-step 82, taking action 0, observation [ 0.05920582  1.083297    0.19012019 -0.68890065 -0.20786901 -0.36068326\n",
      "  0.          0.        ]\n",
      "episode 1-step 83, taking action 0, observation [ 0.06092853  1.0672551   0.19011214 -0.715594   -0.22590278 -0.36067516\n",
      "  0.          0.        ]\n",
      "episode 1-step 84, taking action 3, observation [ 0.06274204  1.0505797   0.20146856 -0.74434465 -0.24632862 -0.40851682\n",
      "  0.          0.        ]\n",
      "episode 1-step 85, taking action 1, observation [ 0.06447516  1.0333471   0.19119206 -0.76900154 -0.2645719  -0.36486572\n",
      "  0.          0.        ]\n",
      "episode 1-step 86, taking action 0, observation [ 0.06620903  1.0155188   0.19118148 -0.7956951  -0.28281477 -0.36485735\n",
      "  0.          0.        ]\n",
      "episode 1-step 87, taking action 1, observation [ 0.06787471  0.997124    0.18248726 -0.820722   -0.29921168 -0.32793847\n",
      "  0.          0.        ]\n",
      "episode 1-step 88, taking action 1, observation [ 0.06944837  0.9781633   0.17085326 -0.84553653 -0.31317538 -0.27927423\n",
      "  0.          0.        ]\n",
      "episode 1-step 89, taking action 3, observation [ 0.07108803  0.9585657   0.17915335 -0.87436134 -0.3289713  -0.3159181\n",
      "  0.          0.        ]\n",
      "episode 1-step 90, taking action 1, observation [ 0.07265683  0.9384018   0.17015705 -0.8992585  -0.34284547 -0.2774843\n",
      "  0.          0.        ]\n",
      "episode 1-step 91, taking action 0, observation [ 0.07422628  0.9176402   0.17014906 -0.92594033 -0.3567195  -0.27748057\n",
      "  0.          0.        ]\n",
      "episode 1-step 92, taking action 2, observation [ 0.07592449  0.8970484   0.18321088 -0.9185857  -0.37086406 -0.28289136\n",
      "  0.          0.        ]\n",
      "episode 1-step 93, taking action 2, observation [ 0.07801523  0.8768073   0.22180386 -0.90296644 -0.38437825 -0.27028355\n",
      "  0.          0.        ]\n",
      "episode 1-step 94, taking action 1, observation [ 0.08003922  0.8559992   0.21334939 -0.92779845 -0.39606258 -0.23368664\n",
      "  0.          0.        ]\n",
      "episode 1-step 95, taking action 2, observation [ 0.0822176   0.8357051   0.22935005 -0.9052209  -0.4084132  -0.24701238\n",
      "  0.          0.        ]\n",
      "episode 1-step 96, taking action 3, observation [ 0.08446836  0.81476086  0.23849344 -0.9347927  -0.42287987 -0.28933358\n",
      "  0.          0.        ]\n",
      "episode 1-step 97, taking action 1, observation [ 0.08665371  0.79326755  0.23005788 -0.95876914 -0.43538806 -0.2501635\n",
      "  0.          0.        ]\n",
      "episode 1-step 98, taking action 1, observation [ 0.08875246  0.77124137  0.21897519 -0.9817895  -0.44530213 -0.19828138\n",
      "  0.          0.        ]\n",
      "episode 1-step 99, taking action 3, observation [ 0.09092569  0.74855864  0.22839458 -1.0116963  -0.45744568 -0.24287133\n",
      "  0.          0.        ]\n",
      "episode 2-step 0, taking action 1, observation [-0.00262709  1.4099989  -0.1389114  -0.03341962  0.00495462  0.06945905\n",
      "  0.          0.        ]\n",
      "episode 2-step 1, taking action 1, observation [-0.00405397  1.4086555  -0.1480574  -0.05973164  0.01025646  0.10604663\n",
      "  0.          0.        ]\n",
      "episode 2-step 2, taking action 1, observation [-0.00554075  1.4067159  -0.15555683 -0.08626957  0.01705718  0.13602711\n",
      "  0.          0.        ]\n",
      "episode 2-step 3, taking action 1, observation [-0.00709705  1.4041705  -0.1642713  -0.11325939  0.0256018   0.17090823\n",
      "  0.          0.        ]\n",
      "episode 2-step 4, taking action 3, observation [-0.0085659   1.4010186  -0.15329902 -0.14020273  0.03194285  0.12683281\n",
      "  0.          0.        ]\n",
      "episode 2-step 5, taking action 3, observation [-0.00994997  1.3972601  -0.14266361 -0.16714314  0.03615266  0.08420415\n",
      "  0.          0.        ]\n",
      "episode 2-step 6, taking action 2, observation [-0.01151695  1.394147   -0.16020714 -0.13844888  0.0396354   0.06966121\n",
      "  0.          0.        ]\n",
      "episode 2-step 7, taking action 0, observation [-0.01308393  1.3904338  -0.16021667 -0.16512856  0.04311777  0.06965366\n",
      "  0.          0.        ]\n",
      "episode 2-step 8, taking action 2, observation [-0.01467972  1.3871038  -0.16309004 -0.14811148  0.04659739  0.06959875\n",
      "  0.          0.        ]\n",
      "episode 2-step 9, taking action 2, observation [-0.01616001  1.3840103  -0.15221229 -0.13762325  0.05074743  0.08300786\n",
      "  0.          0.        ]\n",
      "episode 2-step 10, taking action 1, observation [-0.01772604  1.3803098  -0.16298029 -0.16469784  0.05705719  0.12620673\n",
      "  0.          0.        ]\n",
      "episode 2-step 11, taking action 2, observation [-0.01925268  1.3774202  -0.1594735  -0.12870236  0.06380593  0.13498744\n",
      "  0.          0.        ]\n",
      "episode 2-step 12, taking action 3, observation [-0.02068729  1.3739413  -0.14792207 -0.15481059  0.06822579  0.08840497\n",
      "  0.          0.        ]\n",
      "episode 2-step 13, taking action 1, observation [-0.02220306  1.3698572  -0.15809922 -0.18182823  0.07468759  0.12924765\n",
      "  0.          0.        ]\n",
      "episode 2-step 14, taking action 0, observation [-0.02371912  1.3651736  -0.15811816 -0.20850085  0.08114704  0.12920079\n",
      "  0.          0.        ]\n",
      "episode 2-step 15, taking action 0, observation [-0.02523546  1.3598905  -0.15813625 -0.23517352  0.08760592  0.12918922\n",
      "  0.          0.        ]\n",
      "episode 2-step 16, taking action 2, observation [-0.02696619  1.3555413  -0.17899457 -0.19365267  0.09348236  0.11753967\n",
      "  0.          0.        ]\n",
      "episode 2-step 17, taking action 3, observation [-0.02861385  1.350608   -0.1685423  -0.21950683  0.0972411   0.07518123\n",
      "  0.          0.        ]\n",
      "episode 2-step 18, taking action 3, observation [-0.0301939   1.3450725  -0.16010673 -0.24615364  0.09931502  0.04148252\n",
      "  0.          0.        ]\n",
      "episode 2-step 19, taking action 1, observation [-0.0318697   1.3389158  -0.1721076  -0.2739384   0.1038222   0.09015122\n",
      "  0.          0.        ]\n",
      "episode 2-step 20, taking action 3, observation [-0.03346844  1.3321697  -0.16245022 -0.30000904  0.10637691  0.0510987\n",
      "  0.          0.        ]\n",
      "episode 2-step 21, taking action 2, observation [-0.03500395  1.3259263  -0.15677382 -0.27771887  0.10958866  0.06424085\n",
      "  0.          0.        ]\n",
      "episode 2-step 22, taking action 2, observation [-0.03647232  1.3202131  -0.15076037 -0.2542159   0.1134915   0.07806354\n",
      "  0.          0.        ]\n",
      "episode 2-step 23, taking action 1, observation [-0.03803043  1.3138868  -0.16201472 -0.28164876  0.11966363  0.12345382\n",
      "  0.          0.        ]\n",
      "episode 2-step 24, taking action 1, observation [-0.03967895  1.306933   -0.17338684 -0.3097675   0.1281496   0.16971937\n",
      "  0.          0.        ]\n",
      "episode 2-step 25, taking action 1, observation [-0.04139271  1.2993695  -0.18156514 -0.337066    0.13829035  0.20281497\n",
      "  0.          0.        ]\n",
      "episode 2-step 26, taking action 1, observation [-0.04317627  1.2912041  -0.19026299 -0.3640518   0.15017265  0.23764619\n",
      "  0.          0.        ]\n",
      "episode 2-step 27, taking action 2, observation [-0.04516935  1.2834507  -0.21070102 -0.34579077  0.16156328  0.2278125\n",
      "  0.          0.        ]\n",
      "episode 2-step 28, taking action 1, observation [-0.04723244  1.2750711  -0.21952064 -0.37391672  0.17478575  0.26444975\n",
      "  0.          0.        ]\n",
      "episode 2-step 29, taking action 2, observation [-0.04925337  1.2670786  -0.21599281 -0.35691577  0.18872947  0.27887425\n",
      "  0.          0.        ]\n",
      "episode 2-step 30, taking action 2, observation [-0.05152483  1.2596736  -0.24055538 -0.330871    0.20222187  0.26984817\n",
      "  0.          0.        ]\n",
      "episode 2-step 31, taking action 1, observation [-0.05387392  1.2516394  -0.25029707 -0.35926238  0.21775606  0.31068367\n",
      "  0.          0.        ]\n",
      "episode 2-step 32, taking action 2, observation [-0.05650149  1.2439191  -0.2774761  -0.3453598   0.23263997  0.29767856\n",
      "  0.          0.        ]\n",
      "episode 2-step 33, taking action 2, observation [-0.05949688  1.2368003  -0.31339735 -0.3186482   0.24672298  0.28166062\n",
      "  0.          0.        ]\n",
      "episode 2-step 34, taking action 2, observation [-0.06278534  1.2301491  -0.34216195 -0.2978973   0.26029497  0.2714407\n",
      "  0.          0.        ]\n",
      "episode 2-step 35, taking action 2, observation [-0.06627198  1.224227   -0.36208576 -0.26565173  0.2740361   0.2748225\n",
      "  0.          0.        ]\n",
      "episode 2-step 36, taking action 0, observation [-0.06975899  1.2177073  -0.36207953 -0.29233366  0.28777707  0.27481896\n",
      "  0.          0.        ]\n",
      "episode 2-step 37, taking action 3, observation [-0.07317419  1.2106234  -0.3529445  -0.31713834  0.29956356  0.23572974\n",
      "  0.          0.        ]\n",
      "episode 2-step 38, taking action 1, observation [-0.07665253  1.2029151  -0.3608424  -0.34532648  0.31303048  0.26933855\n",
      "  0.          0.        ]\n",
      "episode 2-step 39, taking action 0, observation [-0.08013134  1.1946092  -0.36083555 -0.37200767  0.32649723  0.26933515\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 2-step 40, taking action 3, observation [-0.08352232  1.1857522  -0.34966254 -0.39606428  0.33752772  0.22060981\n",
      "  0.          0.        ]\n",
      "episode 2-step 41, taking action 0, observation [-0.08691359  1.1762967  -0.34965754 -0.42274058  0.34855813  0.22060792\n",
      "  0.          0.        ]\n",
      "episode 2-step 42, taking action 2, observation [-0.09063759  1.1675699  -0.38283023 -0.3904232   0.35954174  0.21967256\n",
      "  0.          0.        ]\n",
      "episode 2-step 43, taking action 0, observation [-0.09436188  1.1582446  -0.382825   -0.41709933  0.37052527  0.21967068\n",
      "  0.          0.        ]\n",
      "episode 2-step 44, taking action 0, observation [-0.09808664  1.1483208  -0.3828196  -0.44377545  0.3815087   0.21966882\n",
      "  0.          0.        ]\n",
      "episode 2-step 45, taking action 0, observation [-0.10181179  1.1377983  -0.38281402 -0.4704515   0.39249206  0.219667\n",
      "  0.          0.        ]\n",
      "episode 2-step 46, taking action 1, observation [-0.10561514  1.1266358  -0.39261523 -0.49956214  0.4056416   0.2629909\n",
      "  0.          0.        ]\n",
      "episode 2-step 47, taking action 2, observation [-0.10979185  1.1158398  -0.42949492 -0.48325726  0.41838074  0.25478262\n",
      "  0.          0.        ]\n",
      "episode 2-step 48, taking action 2, observation [-0.11444378  1.1055465  -0.47636667 -0.460847    0.43049258  0.24223635\n",
      "  0.          0.        ]\n",
      "episode 2-step 49, taking action 1, observation [-0.11915827  1.0946126  -0.4842189  -0.48992008  0.44441232  0.2783949\n",
      "  0.          0.        ]\n",
      "episode 2-step 50, taking action 2, observation [-0.12417908  1.0839695  -0.514691   -0.4770954   0.45823258  0.27640492\n",
      "  0.          0.        ]\n",
      "episode 2-step 51, taking action 0, observation [-0.12920074  1.0727285  -0.51468056 -0.5037765   0.47205263  0.27640122\n",
      "  0.          0.        ]\n",
      "episode 2-step 52, taking action 2, observation [-0.1344924   1.061735   -0.5416566  -0.4929234   0.48594838  0.2779153\n",
      "  0.          0.        ]\n",
      "episode 2-step 53, taking action 2, observation [-0.14010887  1.0513958  -0.57447    -0.46410882  0.5003221   0.28747386\n",
      "  0.          0.        ]\n",
      "episode 2-step 54, taking action 0, observation [-0.1457262   1.0404589  -0.57445776 -0.49079075  0.5146956   0.28746977\n",
      "  0.          0.        ]\n",
      "episode 2-step 55, taking action 3, observation [-0.15127134  1.0289918  -0.56510186 -0.5136961   0.52673775  0.24084273\n",
      "  0.          0.        ]\n",
      "episode 2-step 56, taking action 2, observation [-0.15708475  1.0173997  -0.59155184 -0.51921064  0.53844416  0.234128\n",
      "  0.          0.        ]\n",
      "episode 2-step 57, taking action 3, observation [-0.1628313   1.0052726  -0.58292544 -0.5422942   0.5479727   0.19057024\n",
      "  0.          0.        ]\n",
      "episode 2-step 58, taking action 3, observation [-0.16851416  0.9925929  -0.5748651  -0.56624854  0.5555897   0.15234014\n",
      "  0.          0.        ]\n",
      "episode 2-step 59, taking action 2, observation [-0.17454138  0.98040134 -0.60962117 -0.5447244   0.56362385  0.1606836\n",
      "  0.          0.        ]\n",
      "episode 2-step 60, taking action 2, observation [-0.18105325  0.96865386 -0.65789914 -0.52496225  0.57148296  0.15718275\n",
      "  0.          0.        ]\n",
      "episode 2-step 61, taking action 0, observation [-0.18756542  0.95630723 -0.65789497 -0.5516332   0.57934207  0.15718207\n",
      "  0.          0.        ]\n",
      "episode 2-step 62, taking action 1, observation [-0.1941534   0.94328    -0.66762793 -0.5828693   0.5897773   0.2087039\n",
      "  0.          0.        ]\n",
      "episode 2-step 63, taking action 1, observation [-0.20080695  0.92960256 -0.6757812  -0.61259764  0.6022118   0.24868996\n",
      "  0.          0.        ]\n",
      "episode 2-step 64, taking action 0, observation [-0.20746127  0.9153268  -0.67577034 -0.63927513  0.61464614  0.2486873\n",
      "  0.          0.        ]\n",
      "episode 2-step 65, taking action 1, observation [-0.2141829   0.90038794 -0.6842198  -0.66969997  0.62928075  0.29269168\n",
      "  0.          0.        ]\n",
      "episode 2-step 66, taking action 3, observation [-0.22085409  0.8849067  -0.67761153 -0.6932046   0.6421416   0.25721705\n",
      "  0.          0.        ]\n",
      "episode 2-step 67, taking action 3, observation [-0.22745948  0.86890453 -0.6690152  -0.7154745   0.6526284   0.2097375\n",
      "  0.          0.        ]\n",
      "episode 2-step 68, taking action 2, observation [-0.23477574  0.8533233  -0.7394746  -0.6965322   0.6624146   0.19572407\n",
      "  0.          0.        ]\n",
      "episode 2-step 69, taking action 2, observation [-0.24248561  0.8381985  -0.7792227  -0.67653394  0.67276883  0.20708492\n",
      "  0.          0.        ]\n",
      "episode 2-step 70, taking action 1, observation [-0.2502659   0.82240266 -0.7880671  -0.70743555  0.68550426  0.2547086\n",
      "  0.          0.        ]\n",
      "episode 2-step 71, taking action 0, observation [-0.258047    0.8060084  -0.78805435 -0.7341128   0.69823956  0.25470573\n",
      "  0.          0.        ]\n",
      "episode 2-step 72, taking action 2, observation [-0.2665205   0.7898881  -0.85662985 -0.7217047   0.710254    0.24028952\n",
      "  0.          0.        ]\n",
      "episode 2-step 73, taking action 3, observation [-0.27492946  0.77324766 -0.84826386 -0.743836    0.7198809   0.19253786\n",
      "  0.          0.        ]\n",
      "episode 2-step 74, taking action 3, observation [-0.28328213  0.75608927 -0.8408618  -0.7658852   0.7272429   0.14724079\n",
      "  0.          0.        ]\n",
      "episode 2-step 75, taking action 1, observation [-0.291686    0.73825294 -0.8475262  -0.7969961   0.7367179   0.18950012\n",
      "  0.          0.        ]\n",
      "episode 2-step 76, taking action 3, observation [-0.30002338  0.7199158  -0.8388011  -0.8180613   0.74347687  0.13517939\n",
      "  0.          0.        ]\n",
      "episode 2-step 77, taking action 0, observation [-0.3083611   0.7009791  -0.8387973  -0.84473073  0.7502358   0.13517892\n",
      "  0.          0.        ]\n",
      "episode 2-step 78, taking action 0, observation [-0.3166991   0.6814429  -0.83879346 -0.8714002   0.7569947   0.13517849\n",
      "  0.          0.        ]\n",
      "episode 2-step 79, taking action 0, observation [-0.3250373   0.66130716 -0.83878946 -0.8980696   0.76375365  0.13517797\n",
      "  0.          0.        ]\n",
      "episode 2-step 80, taking action 1, observation [-0.3334341   0.64049566 -0.8462404  -0.929171    0.7727481   0.17988959\n",
      "  0.          0.        ]\n",
      "episode 2-step 81, taking action 0, observation [-0.34183145  0.6190849  -0.8462335  -0.9558426   0.7817425   0.17988831\n",
      "  0.          0.        ]\n",
      "episode 2-step 82, taking action 0, observation [-0.35022932  0.59707505 -0.84622633 -0.9825141   0.79073685  0.17988738\n",
      "  0.          0.        ]\n",
      "episode 2-step 83, taking action 1, observation [-0.35867333  0.5743833  -0.8522789  -1.0138769   0.8018445   0.22215235\n",
      "  0.          0.        ]\n",
      "episode 2-step 84, taking action 0, observation [-0.36711818  0.5510928  -0.8522676  -1.0405508   0.812952    0.22215042\n",
      "  0.          0.        ]\n",
      "episode 2-step 85, taking action 2, observation [-0.37593168  0.5277503  -0.8890866  -1.0429467   0.8241144   0.2232482\n",
      "  0.          0.        ]\n",
      "episode 2-step 86, taking action 0, observation [-0.38474607  0.50380886 -0.88907516 -1.0696206   0.8352767   0.2232463\n",
      "  0.          0.        ]\n",
      "episode 2-step 87, taking action 3, observation [-0.3935008   0.4793531  -0.8813837  -1.0913124   0.84401256  0.17471674\n",
      "  0.          0.        ]\n",
      "episode 2-step 88, taking action 0, observation [-0.402256    0.454298   -0.88137656 -1.1179832   0.85274833  0.17471586\n",
      "  0.          0.        ]\n",
      "episode 2-step 89, taking action 3, observation [-0.41097036  0.42870706 -0.87607163 -1.140934    0.85972846  0.1396026\n",
      "  0.          0.        ]\n",
      "episode 2-step 90, taking action 2, observation [-0.42002887  0.40301564 -0.9105401  -1.1454843   0.8668366   0.14216295\n",
      "  0.          0.        ]\n",
      "episode 2-step 91, taking action 2, observation [-0.42949134  0.37703878 -0.95058566 -1.1579508   0.8734748   0.13276388\n",
      "  0.          0.        ]\n",
      "episode 2-step 92, taking action 0, observation [-0.43895406  0.35046238 -0.9505814  -1.1846197   0.88011295  0.13276362\n",
      "  0.          0.        ]\n",
      "episode 2-step 93, taking action 2, observation [-0.44881725  0.32344738 -0.99007666 -1.2037146   0.88595486  0.11683796\n",
      "  0.          0.        ]\n",
      "episode 2-step 94, taking action 2, observation [-0.45941225  0.29660362 -1.0630391  -1.1959611   0.8914975   0.11085264\n",
      "  0.          0.        ]\n",
      "episode 2-step 95, taking action 0, observation [-0.47000748  0.26916012 -1.063036   -1.2226293   0.8970401   0.11085236\n",
      "  0.          0.        ]\n",
      "episode 2-step 96, taking action 0, observation [-0.4806029   0.24111687 -1.063033   -1.2492976   0.90258276  0.11085217\n",
      "  0.          0.        ]\n",
      "episode 2-step 97, taking action 3, observation [-0.49114528  0.21256329 -1.0562077  -1.2707134   0.90572256  0.06279553\n",
      "  0.          0.        ]\n",
      "episode 2-step 98, taking action 3, observation [-0.50164133  0.18348278 -1.0503109  -1.2930698   0.9068534   0.022616\n",
      "  0.          0.        ]\n",
      "episode 2-step 99, taking action 0, observation [-0.51213735  0.15380223 -1.0503106  -1.3197366   0.90798414  0.02261529\n",
      "  0.          0.        ]\n",
      "episode 3-step 0, taking action 0, observation [ 0.00718451  1.4325604   0.3633413   0.46802628 -0.00822792 -0.08145195\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 3-step 1, taking action 1, observation [ 0.01068401  1.4424882   0.35170096  0.44122332 -0.00996163 -0.03467727\n",
      "  0.          0.        ]\n",
      "episode 3-step 2, taking action 2, observation [ 0.01402512  1.452697    0.3366673   0.45370784 -0.01248342 -0.05044046\n",
      "  0.          0.        ]\n",
      "episode 3-step 3, taking action 1, observation [ 0.01728563  1.4623146   0.32655028  0.42744273 -0.01297164 -0.00976531\n",
      "  0.          0.        ]\n",
      "episode 3-step 4, taking action 2, observation [ 2.0660687e-02  1.4726864e+00  3.3750302e-01  4.6096987e-01\n",
      " -1.2969289e-02  4.6883993e-05  0.0000000e+00  0.0000000e+00]\n",
      "episode 3-step 5, taking action 1, observation [ 0.02395115  1.4824547   0.32688576  0.43416232 -0.01083899  0.0426101\n",
      "  0.          0.        ]\n",
      "episode 3-step 6, taking action 1, observation [ 0.02715015  1.4916162   0.31543878  0.40720823 -0.00641739  0.08844034\n",
      "  0.          0.        ]\n",
      "episode 3-step 7, taking action 2, observation [ 3.0493926e-02  1.5009477e+00  3.2923040e-01  4.1474792e-01\n",
      " -1.3232732e-03  1.0189171e-01  0.0000000e+00  0.0000000e+00]\n",
      "episode 3-step 8, taking action 2, observation [0.03399401 1.5112057  0.3441039  0.45590052 0.00450664 0.11660919\n",
      " 0.         0.        ]\n",
      "episode 3-step 9, taking action 1, observation [0.03742399 1.5208625  0.33533445 0.42915398 0.01208986 0.15167831\n",
      " 0.         0.        ]\n",
      "episode 3-step 10, taking action 1, observation [0.04076815 1.5299147  0.32456023 0.40221456 0.02182778 0.19477585\n",
      " 0.         0.        ]\n",
      "episode 3-step 11, taking action 2, observation [0.04401655 1.539814   0.3153572  0.43980986 0.03121328 0.18772735\n",
      " 0.         0.        ]\n",
      "episode 3-step 12, taking action 2, observation [0.04720049 1.5498298  0.3091323  0.44493505 0.04036512 0.18305361\n",
      " 0.         0.        ]\n",
      "episode 3-step 13, taking action 3, observation [0.05044804 1.5592433  0.3171323  0.41814628 0.04791012 0.15091406\n",
      " 0.         0.        ]\n",
      "episode 3-step 14, taking action 2, observation [0.0538372  1.5689458  0.3304755  0.43093172 0.05625411 0.16689508\n",
      " 0.         0.        ]\n",
      "episode 3-step 15, taking action 2, observation [0.05725365 1.578606   0.33299863 0.42900184 0.06482433 0.17141995\n",
      " 0.         0.        ]\n",
      "episode 3-step 16, taking action 1, observation [0.06058359 1.587649   0.32214612 0.40140474 0.07558123 0.21515763\n",
      " 0.         0.        ]\n",
      "episode 3-step 17, taking action 0, observation [0.06391315 1.5960934  0.32211533 0.37472802 0.08633564 0.21510725\n",
      " 0.         0.        ]\n",
      "episode 3-step 18, taking action 3, observation [0.06734113 1.603944   0.3344678  0.34840834 0.09460554 0.16541305\n",
      " 0.         0.        ]\n",
      "episode 3-step 19, taking action 2, observation [0.07086935 1.6123233  0.34369153 0.37181723 0.10367393 0.18138476\n",
      " 0.         0.        ]\n",
      "episode 3-step 20, taking action 3, observation [0.07447815 1.6201041  0.35379076 0.34530416 0.11071596 0.1408532\n",
      " 0.         0.        ]\n",
      "episode 3-step 21, taking action 2, observation [0.07800341 1.6287899  0.34531862 0.38549474 0.11788955 0.14348459\n",
      " 0.         0.        ]\n",
      "episode 3-step 22, taking action 2, observation [0.08130904 1.6377726  0.32405192 0.39870578 0.12437683 0.12975734\n",
      " 0.         0.        ]\n",
      "episode 3-step 23, taking action 3, observation [0.08468475 1.6461791  0.3328935  0.37322238 0.12904723 0.09341585\n",
      " 0.         0.        ]\n",
      "episode 3-step 24, taking action 0, observation [0.08806229 1.6539645  0.33306268 0.34560806 0.13371728 0.09340118\n",
      " 0.         0.        ]\n",
      "episode 3-step 25, taking action 0, observation [0.09143982 1.6611502  0.33306298 0.31893957 0.13838734 0.09340101\n",
      " 0.         0.        ]\n",
      "episode 3-step 26, taking action 2, observation [0.09479628 1.6686177  0.3306877  0.33141285 0.14333673 0.0989878\n",
      " 0.         0.        ]\n",
      "episode 3-step 27, taking action 3, observation [0.09821539 1.67551    0.33858463 0.3060028  0.14665285 0.0663224\n",
      " 0.         0.        ]\n",
      "episode 3-step 28, taking action 0, observation [0.10163441 1.6818025  0.33858484 0.2793352  0.14996897 0.06632231\n",
      " 0.         0.        ]\n",
      "episode 3-step 29, taking action 1, observation [0.10497637 1.6874795  0.32890597 0.2517712  0.15525231 0.10566648\n",
      " 0.         0.        ]\n",
      "episode 3-step 30, taking action 2, observation [0.10810766 1.6933372  0.30843848 0.2598352  0.1599455  0.09386392\n",
      " 0.         0.        ]\n",
      "episode 3-step 31, taking action 3, observation [0.11130466 1.6986194  0.31671253 0.2344402  0.16292617 0.05961325\n",
      " 0.         0.        ]\n",
      "episode 3-step 32, taking action 2, observation [0.11455574 1.7044486  0.32130903 0.2586572  0.16672844 0.07604548\n",
      " 0.         0.        ]\n",
      "episode 3-step 33, taking action 1, observation [0.11773662 1.7096573  0.31246823 0.23085982 0.1723476  0.11238331\n",
      " 0.         0.        ]\n",
      "episode 3-step 34, taking action 0, observation [0.12091732 1.7142664  0.3124689  0.20419055 0.17796676 0.11238308\n",
      " 0.         0.        ]\n",
      "episode 3-step 35, taking action 2, observation [0.1241456  1.7189851  0.31661952 0.20896538 0.18418972 0.12445972\n",
      " 0.         0.        ]\n",
      "episode 3-step 36, taking action 1, observation [0.12730789 1.7230935  0.30837163 0.18160233 0.19208458 0.15789676\n",
      " 0.         0.        ]\n",
      "episode 3-step 37, taking action 2, observation [0.13021545 1.7274541  0.28356576 0.19284743 0.19932488 0.14480594\n",
      " 0.         0.        ]\n",
      "episode 3-step 38, taking action 3, observation [0.1332117  1.7312369  0.29470354 0.16745326 0.2042836  0.09917434\n",
      " 0.         0.        ]\n",
      "episode 3-step 39, taking action 1, observation [0.13612708 1.7343853  0.28449577 0.13894649 0.21138525 0.14203303\n",
      " 0.         0.        ]\n",
      "episode 3-step 40, taking action 3, observation [0.13910961 1.7369606  0.2929658  0.11368813 0.21671654 0.10662619\n",
      " 0.         0.        ]\n",
      "episode 3-step 41, taking action 3, observation [0.14217892 1.7389556  0.30386108 0.08821464 0.21982011 0.06207114\n",
      " 0.         0.        ]\n",
      "episode 3-step 42, taking action 1, observation [0.1451625  1.7403337  0.2931327  0.06045456 0.22511035 0.10580522\n",
      " 0.         0.        ]\n",
      "episode 3-step 43, taking action 1, observation [0.1480525  1.741075   0.28135607 0.03176343 0.23287036 0.15520012\n",
      " 0.         0.        ]\n",
      "episode 3-step 44, taking action 1, observation [0.15087728 1.7412047  0.2731966  0.00425728 0.24229321 0.18845685\n",
      " 0.         0.        ]\n",
      "episode 3-step 45, taking action 2, observation [0.15332966 1.7420197  0.23667271 0.03478785 0.2509948  0.17403163\n",
      " 0.         0.        ]\n",
      "episode 3-step 46, taking action 3, observation [0.15584135 1.7422478  0.24412826 0.0089189  0.25817525 0.14360906\n",
      " 0.         0.        ]\n",
      "episode 3-step 47, taking action 2, observation [0.15817185 1.7430317  0.22592337 0.03356396 0.26545572 0.14560954\n",
      " 0.         0.        ]\n",
      "episode 3-step 48, taking action 2, observation [0.16027737 1.7440096  0.20379734 0.0422209  0.27237493 0.13838488\n",
      " 0.         0.        ]\n",
      "episode 3-step 49, taking action 2, observation [0.16216412 1.7449648  0.18241262 0.04126857 0.27879837 0.12846947\n",
      " 0.         0.        ]\n",
      "episode 3-step 50, taking action 3, observation [0.16412506 1.7453613  0.19183187 0.01680231 0.28317922 0.08761688\n",
      " 0.         0.        ]\n",
      "episode 3-step 51, taking action 2, observation [0.1659934  1.7465972  0.18176685 0.0539364  0.2883895  0.10420605\n",
      " 0.         0.        ]\n",
      "episode 3-step 52, taking action 2, observation [0.16774702 1.7481027  0.17005904 0.065855   0.2938544  0.10929766\n",
      " 0.         0.        ]\n",
      "episode 3-step 53, taking action 0, observation [0.16950063 1.7490085  0.17006013 0.03918594 0.29931927 0.10929738\n",
      " 0.         0.        ]\n",
      "episode 3-step 54, taking action 0, observation [0.17125407 1.749315   0.1700612  0.01251689 0.30478412 0.10929718\n",
      " 0.         0.        ]\n",
      "episode 3-step 55, taking action 1, observation [ 0.17295179  1.748986    0.16298348 -0.01606179  0.31181806  0.14067826\n",
      "  0.          0.        ]\n",
      "episode 3-step 56, taking action 0, observation [ 0.17464933  1.7480577   0.16298535 -0.0427324   0.31885192  0.14067778\n",
      "  0.          0.        ]\n",
      "episode 3-step 57, taking action 3, observation [ 0.17641315  1.7465692   0.1713992  -0.06726535  0.3240327   0.10361604\n",
      "  0.          0.        ]\n",
      "episode 3-step 58, taking action 2, observation [ 0.17775097  1.7458467   0.12930702 -0.0331105   0.32868737  0.09309323\n",
      "  0.          0.        ]\n",
      "episode 3-step 59, taking action 1, observation [ 0.17902946  1.7444913   0.12181953 -0.06162823  0.33498704  0.1259936\n",
      "  0.          0.        ]\n",
      "episode 3-step 60, taking action 0, observation [ 0.18030795  1.7425363   0.12182114 -0.08829805  0.3412867   0.12599328\n",
      "  0.          0.        ]\n",
      "episode 3-step 61, taking action 3, observation [ 0.18165216  1.7400169   0.13015905 -0.11299524  0.3457637   0.08954015\n",
      "  0.          0.        ]\n",
      "episode 3-step 62, taking action 2, observation [ 0.18274489  1.73815     0.10479502 -0.08405872  0.3504551   0.09382787\n",
      "  0.          0.        ]\n",
      "episode 3-step 63, taking action 3, observation [ 0.18391275  1.7357311   0.11436041 -0.10809468  0.35300583  0.05101443\n",
      "  0.          0.        ]\n",
      "episode 3-step 64, taking action 1, observation [ 0.18501052  1.7326764   0.1055284  -0.13681157  0.35748395  0.08956264\n",
      "  0.          0.        ]\n",
      "episode 3-step 65, taking action 0, observation [ 0.1861083   1.7290221   0.10552927 -0.16347982  0.36196205  0.08956252\n",
      "  0.          0.        ]\n",
      "episode 3-step 66, taking action 1, observation [ 0.18712997  1.7247357   0.09598462 -0.19207387  0.3684853   0.13046524\n",
      "  0.          0.        ]\n",
      "episode 3-step 67, taking action 2, observation [ 0.18776074  1.721019    0.05728917 -0.16669604  0.37462193  0.12273256\n",
      "  0.          0.        ]\n",
      "episode 3-step 68, taking action 2, observation [ 0.18797025  1.7178514   0.01566096 -0.1421825   0.3802423   0.11240701\n",
      "  0.          0.        ]\n",
      "episode 3-step 69, taking action 2, observation [ 0.18788032  1.7147478  -0.01377886 -0.13922273  0.3853367   0.1018879\n",
      "  0.          0.        ]\n",
      "episode 3-step 70, taking action 0, observation [ 0.1877903   1.7110447  -0.01377764 -0.16589142  0.39043108  0.10188769\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 3-step 71, taking action 1, observation [ 0.18762378  1.7066947  -0.02345245 -0.19520909  0.3976983   0.14534459\n",
      "  0.          0.        ]\n",
      "episode 3-step 72, taking action 3, observation [ 0.18752404  1.7017949  -0.01491392 -0.21916871  0.40299255  0.10588486\n",
      "  0.          0.        ]\n",
      "episode 3-step 73, taking action 0, observation [ 0.18742418  1.6962953  -0.01491255 -0.24583751  0.40828678  0.10588463\n",
      "  0.          0.        ]\n",
      "episode 3-step 74, taking action 3, observation [ 0.18738957  1.6902304  -0.00669061 -0.27048665  0.4117673   0.06961019\n",
      "  0.          0.        ]\n",
      "episode 3-step 75, taking action 3, observation [ 0.18742609  1.6836141   0.00234146 -0.2944372   0.41317937  0.02824183\n",
      "  0.          0.        ]\n",
      "episode 3-step 76, taking action 0, observation [ 0.18746261  1.6763979   0.00234156 -0.32110402  0.41459143  0.02824179\n",
      "  0.          0.        ]\n",
      "episode 3-step 77, taking action 2, observation [ 0.18713483  1.6697643  -0.03403148 -0.29520166  0.41592994  0.02677041\n",
      "  0.          0.        ]\n",
      "episode 3-step 78, taking action 1, observation [ 0.18672295  1.6624706  -0.04470706 -0.32520473  0.41973355  0.07607219\n",
      "  0.          0.        ]\n",
      "episode 3-step 79, taking action 0, observation [ 0.18631105  1.654577   -0.04470632 -0.35187247  0.42353716  0.07607209\n",
      "  0.          0.        ]\n",
      "episode 3-step 80, taking action 2, observation [ 0.18534175  1.6473079  -0.09969963 -0.3239153   0.4265426   0.06010832\n",
      "  0.          0.        ]\n",
      "episode 3-step 81, taking action 2, observation [ 0.18418026  1.6404114  -0.11932337 -0.30746832  0.42998     0.06874809\n",
      "  0.          0.        ]\n",
      "episode 3-step 82, taking action 3, observation [ 0.18310909  1.6329697  -0.10790639 -0.33098805  0.4308437   0.01727397\n",
      "  0.          0.        ]\n",
      "episode 3-step 83, taking action 0, observation [ 0.18203792  1.624928   -0.10790636 -0.35765475  0.4317074   0.01727414\n",
      "  0.          0.        ]\n",
      "episode 3-step 84, taking action 0, observation [ 0.18096685  1.6162862  -0.10790634 -0.3843215   0.4325711   0.01727415\n",
      "  0.          0.        ]\n",
      "episode 3-step 85, taking action 3, observation [ 0.17997865  1.6071131  -0.09728793 -0.40721926  0.43090695 -0.0332835\n",
      "  0.          0.        ]\n",
      "episode 3-step 86, taking action 0, observation [ 0.17899045  1.5973403  -0.09728779 -0.43388617  0.42924276 -0.03328348\n",
      "  0.          0.        ]\n",
      "episode 3-step 87, taking action 3, observation [ 0.17806149  1.5870172  -0.08969624 -0.4578318   0.42576802 -0.06949415\n",
      "  0.          0.        ]\n",
      "episode 3-step 88, taking action 3, observation [ 0.17719822  1.576126   -0.08145001 -0.4825837   0.4204909  -0.10554256\n",
      "  0.          0.        ]\n",
      "episode 3-step 89, taking action 1, observation [ 0.17627755  1.564595   -0.08874395 -0.511499    0.41689402 -0.07193737\n",
      "  0.          0.        ]\n",
      "episode 3-step 90, taking action 2, observation [ 0.17511253  1.553571   -0.11349355 -0.4890781   0.41364643 -0.06495149\n",
      "  0.          0.        ]\n",
      "episode 3-step 91, taking action 1, observation [ 0.17387657  1.5419066  -0.12242103 -0.51807237  0.4123892  -0.02514454\n",
      "  0.          0.        ]\n",
      "episode 3-step 92, taking action 1, observation [ 0.17257948  1.5295904  -0.13028292 -0.54755193  0.41300073  0.0122308\n",
      "  0.          0.        ]\n",
      "episode 3-step 93, taking action 3, observation [ 0.17136201  1.5167263  -0.12018305 -0.5712872   0.41131523 -0.03371027\n",
      "  0.          0.        ]\n",
      "episode 3-step 94, taking action 2, observation [ 0.17000847  1.504225   -0.13435307 -0.5553188   0.41024083 -0.02148794\n",
      "  0.          0.        ]\n",
      "episode 3-step 95, taking action 3, observation [ 0.16873398  1.4911779  -0.12430717 -0.57897085  0.40686724 -0.06747234\n",
      "  0.          0.        ]\n",
      "episode 3-step 96, taking action 2, observation [ 0.16709451  1.4785707  -0.16054615 -0.5593497   0.40321246 -0.07309527\n",
      "  0.          0.        ]\n",
      "episode 3-step 97, taking action 3, observation [ 0.16553402  1.46541    -0.15055758 -0.5833692   0.3973254  -0.11774087\n",
      "  0.          0.        ]\n",
      "episode 3-step 98, taking action 2, observation [ 0.16380481  1.4523995  -0.16758098 -0.57676536  0.39161414 -0.11422502\n",
      "  0.          0.        ]\n",
      "episode 3-step 99, taking action 3, observation [ 0.16214085  1.4388212  -0.15936552 -0.6015583   0.38411215 -0.1500397\n",
      "  0.          0.        ]\n",
      "episode 4-step 0, taking action 0, observation [-0.007866    1.4217054  -0.39782962  0.22681114  0.00902296  0.08918929\n",
      "  0.          0.        ]\n",
      "episode 4-step 1, taking action 0, observation [-0.01179934  1.42621    -0.39784366  0.20017438  0.01347931  0.08913518\n",
      "  0.          0.        ]\n",
      "episode 4-step 2, taking action 0, observation [-0.01573286  1.4301151  -0.39785677  0.1735021   0.01793511  0.08912405\n",
      "  0.          0.        ]\n",
      "episode 4-step 3, taking action 3, observation [-0.01959343  1.4334316  -0.3887103   0.14736697  0.02055005  0.0523038\n",
      "  0.          0.        ]\n",
      "episode 4-step 4, taking action 2, observation [-0.02343864  1.437166   -0.3873101   0.16592978  0.02330581  0.05512058\n",
      "  0.          0.        ]\n",
      "episode 4-step 5, taking action 0, observation [-0.02728396  1.4403002  -0.3873183   0.13925257  0.02606068  0.05510233\n",
      "  0.          0.        ]\n",
      "episode 4-step 6, taking action 2, observation [-0.03106804  1.4439156  -0.3815715   0.16063334  0.02919685  0.06272914\n",
      "  0.          0.        ]\n",
      "episode 4-step 7, taking action 1, observation [-0.03492365  1.4469286  -0.390549    0.13379812  0.03413124  0.09869685\n",
      "  0.          0.        ]\n",
      "episode 4-step 8, taking action 2, observation [-0.03874054  1.450806   -0.38702065  0.17220251  0.03940661  0.10551709\n",
      "  0.          0.        ]\n",
      "episode 4-step 9, taking action 1, observation [-0.04261952  1.4540844  -0.39480278  0.14551274  0.04623725  0.13662525\n",
      "  0.          0.        ]\n",
      "episode 4-step 10, taking action 3, observation [-0.04643917  1.4567652  -0.3873457   0.11896583  0.05156697  0.10660436\n",
      "  0.          0.        ]\n",
      "episode 4-step 11, taking action 2, observation [-0.05044146  1.4594363  -0.4048415   0.1185477   0.05613082  0.09128533\n",
      "  0.          0.        ]\n",
      "episode 4-step 12, taking action 2, observation [-0.0544878   1.4628563  -0.40926632  0.1518205   0.06072555  0.09190275\n",
      "  0.          0.        ]\n",
      "episode 4-step 13, taking action 0, observation [-0.05853424  1.4656763  -0.40927872  0.1251415   0.06531925  0.09188219\n",
      "  0.          0.        ]\n",
      "episode 4-step 14, taking action 3, observation [-0.06248455  1.4679065  -0.39722893  0.09901763  0.06748646  0.04334837\n",
      "  0.          0.        ]\n",
      "episode 4-step 15, taking action 2, observation [-0.06664886  1.4708374  -0.41788036  0.13019496  0.0689268   0.02880941\n",
      "  0.          0.        ]\n",
      "episode 4-step 16, taking action 3, observation [-0.07072926  1.4731834  -0.40734428  0.10430322  0.06823978 -0.01374146\n",
      "  0.          0.        ]\n",
      "episode 4-step 17, taking action 3, observation [-0.07475052  1.4749447  -0.3999102   0.07838289  0.06604859 -0.04382769\n",
      "  0.          0.        ]\n",
      "episode 4-step 18, taking action 3, observation [-0.0787117   1.4761013  -0.39239448  0.05156048  0.06236062 -0.07376638\n",
      "  0.          0.        ]\n",
      "episode 4-step 19, taking action 2, observation [-0.08261366  1.4778564  -0.38699004  0.07813003  0.05918027 -0.0636126\n",
      "  0.          0.        ]\n",
      "episode 4-step 20, taking action 0, observation [-0.08651552  1.4790113  -0.3869802   0.05145145  0.05599967 -0.06361754\n",
      "  0.          0.        ]\n",
      "episode 4-step 21, taking action 0, observation [-0.09041739  1.4795665  -0.3869711   0.02478671  0.05281975 -0.06360412\n",
      "  0.          0.        ]\n",
      "episode 4-step 22, taking action 1, observation [-0.09441595  1.4795074  -0.39912352 -0.00259194  0.05208732 -0.01464971\n",
      "  0.          0.        ]\n",
      "episode 4-step 23, taking action 0, observation [-0.09841461  1.4788486  -0.39912194 -0.02926082  0.05135402 -0.01466753\n",
      "  0.          0.        ]\n",
      "episode 4-step 24, taking action 0, observation [-0.10241251  1.4775681  -0.39904425 -0.0568822   0.05062135 -0.01465359\n",
      "  0.          0.        ]\n",
      "episode 4-step 25, taking action 3, observation [-0.10631838  1.4756885  -0.3875098  -0.08344124  0.04757963 -0.06083397\n",
      "  0.          0.        ]\n",
      "episode 4-step 26, taking action 2, observation [-0.11014137  1.4743795  -0.3797865  -0.05809795  0.04509196 -0.04975352\n",
      "  0.          0.        ]\n",
      "episode 4-step 27, taking action 2, observation [-0.11417589  1.4736352  -0.400112   -0.03297586  0.04178818 -0.06607555\n",
      "  0.          0.        ]\n",
      "episode 4-step 28, taking action 1, observation [-0.11829748  1.472295   -0.4110288  -0.05954138  0.04066711 -0.02242124\n",
      "  0.          0.        ]\n",
      "episode 4-step 29, taking action 2, observation [-0.12233305  1.4709432  -0.4029026  -0.06006491  0.04001727 -0.01299667\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 4-step 30, taking action 0, observation [-0.12636861  1.4689913  -0.40290266 -0.08673161  0.03936743 -0.01299668\n",
      "  0.          0.        ]\n",
      "episode 4-step 31, taking action 3, observation [-0.13032503  1.4664428  -0.39296466 -0.11319604  0.03672589 -0.05283055\n",
      "  0.          0.        ]\n",
      "episode 4-step 32, taking action 3, observation [-0.13419695  1.4633005  -0.38236386 -0.13954894  0.03195847 -0.09534859\n",
      "  0.          0.        ]\n",
      "episode 4-step 33, taking action 3, observation [-0.13799867  1.4595567  -0.37356812 -0.16626202  0.02543102 -0.13054875\n",
      "  0.          0.        ]\n",
      "episode 4-step 34, taking action 0, observation [-0.1418004   1.4552135  -0.3735679  -0.19293228  0.01890361 -0.13054837\n",
      "  0.          0.        ]\n",
      "episode 4-step 35, taking action 2, observation [-0.14573984  1.4512606  -0.38674238 -0.1756077   0.01179697 -0.14213261\n",
      "  0.          0.        ]\n",
      "episode 4-step 36, taking action 2, observation [-0.14978418  1.4475577  -0.39677277 -0.164537    0.00423561 -0.15122753\n",
      "  0.          0.        ]\n",
      "episode 4-step 37, taking action 3, observation [-0.1537487   1.4432545  -0.38674918 -0.19125368 -0.00533302 -0.19137262\n",
      "  0.          0.        ]\n",
      "episode 4-step 38, taking action 3, observation [-0.15765181  1.4383571  -0.3790543  -0.21774004 -0.0164422  -0.22218373\n",
      "  0.          0.        ]\n",
      "episode 4-step 39, taking action 0, observation [-0.16155481  1.4328616  -0.3790545  -0.24441703 -0.02755127 -0.22218187\n",
      "  0.          0.        ]\n",
      "episode 4-step 40, taking action 1, observation [-0.16555671  1.4267697  -0.39144757 -0.27093083 -0.03617768 -0.17252788\n",
      "  0.          0.        ]\n",
      "episode 4-step 41, taking action 2, observation [-0.16955718  1.4215306  -0.3911563  -0.2330899  -0.04496036 -0.17565396\n",
      "  0.          0.        ]\n",
      "episode 4-step 42, taking action 0, observation [-0.17355767  1.4156926  -0.39115664 -0.25976303 -0.05374299 -0.17565304\n",
      "  0.          0.        ]\n",
      "episode 4-step 43, taking action 2, observation [-0.17762251  1.4099692  -0.39718115 -0.254734   -0.06294185 -0.18397698\n",
      "  0.          0.        ]\n",
      "episode 4-step 44, taking action 3, observation [-0.18159962  1.4036388  -0.38617727 -0.28187722 -0.07435125 -0.2281882\n",
      "  0.          0.        ]\n",
      "episode 4-step 45, taking action 0, observation [-0.18557663  1.3967104  -0.3861784  -0.30855474 -0.08576057 -0.22818618\n",
      "  0.          0.        ]\n",
      "episode 4-step 46, taking action 2, observation [-0.18951455  1.3906204  -0.38207787 -0.27138394 -0.09737017 -0.23219232\n",
      "  0.          0.        ]\n",
      "episode 4-step 47, taking action 2, observation [-0.1935155   1.3845612  -0.38791418 -0.2701286  -0.1094595  -0.24178663\n",
      "  0.          0.        ]\n",
      "episode 4-step 48, taking action 3, observation [-0.19745664  1.377902   -0.38044053 -0.29702362 -0.12304568 -0.27172384\n",
      "  0.          0.        ]\n",
      "episode 4-step 49, taking action 1, observation [-0.2014627   1.3706573  -0.38861996 -0.32301772 -0.13497464 -0.2385792\n",
      "  0.          0.        ]\n",
      "episode 4-step 50, taking action 2, observation [-0.20519051  1.3642281  -0.36150292 -0.2867993  -0.14621602 -0.22482741\n",
      "  0.          0.        ]\n",
      "episode 4-step 51, taking action 2, observation [-0.20893893  1.3587077  -0.36280945 -0.24657993 -0.15823016 -0.24028261\n",
      "  0.          0.        ]\n",
      "episode 4-step 52, taking action 3, observation [-0.21261434  1.3525839  -0.35370332 -0.2736914  -0.17207117 -0.27682045\n",
      "  0.          0.        ]\n",
      "episode 4-step 53, taking action 1, observation [-0.21637711  1.3458748  -0.36469537 -0.2995701  -0.18369213 -0.23241898\n",
      "  0.          0.        ]\n",
      "episode 4-step 54, taking action 0, observation [-0.22013974  1.3385674  -0.36469835 -0.32624796 -0.19531296 -0.23241682\n",
      "  0.          0.        ]\n",
      "episode 4-step 55, taking action 1, observation [-0.2239707   1.3306941  -0.3733746  -0.3512354  -0.20510566 -0.19585365\n",
      "  0.          0.        ]\n",
      "episode 4-step 56, taking action 2, observation [-0.22750525  1.3230724  -0.34455377 -0.34000364 -0.2140911  -0.17970891\n",
      "  0.          0.        ]\n",
      "episode 4-step 57, taking action 0, observation [-0.23103972  1.3148519  -0.34455582 -0.36667693 -0.2230765  -0.17970794\n",
      "  0.          0.        ]\n",
      "episode 4-step 58, taking action 3, observation [-0.2345088   1.3060119  -0.33636162 -0.39452186 -0.23376054 -0.21368098\n",
      "  0.          0.        ]\n",
      "episode 4-step 59, taking action 0, observation [-0.23797765  1.2965734  -0.33636484 -0.42119786 -0.2444445  -0.21367928\n",
      "  0.          0.        ]\n",
      "episode 4-step 60, taking action 1, observation [-0.24152598  1.2865741  -0.34645122 -0.44583094 -0.25298217 -0.17075306\n",
      "  0.          0.        ]\n",
      "episode 4-step 61, taking action 1, observation [-0.24515614  1.2760072  -0.3567716  -0.47072512 -0.259351   -0.12737648\n",
      "  0.          0.        ]\n",
      "episode 4-step 62, taking action 2, observation [-0.24837947  1.2661499  -0.3168667  -0.43908587 -0.26493695 -0.11171961\n",
      "  0.          0.        ]\n",
      "episode 4-step 63, taking action 3, observation [-0.2515258   1.2556558  -0.30714113 -0.4677804  -0.27260336 -0.15332839\n",
      "  0.          0.        ]\n",
      "episode 4-step 64, taking action 1, observation [-0.25474486  1.244592   -0.31633097 -0.49278346 -0.27832797 -0.11449236\n",
      "  0.          0.        ]\n",
      "episode 4-step 65, taking action 3, observation [-0.25788036  1.2328925  -0.30580574 -0.521469   -0.28628883 -0.15921721\n",
      "  0.          0.        ]\n",
      "episode 4-step 66, taking action 3, observation [-0.26095462  1.2205682  -0.29810768 -0.5496058  -0.29588526 -0.19192852\n",
      "  0.          0.        ]\n",
      "episode 4-step 67, taking action 0, observation [-0.26402855  1.207645   -0.298111   -0.57627994 -0.30548164 -0.19192734\n",
      "  0.          0.        ]\n",
      "episode 4-step 68, taking action 3, observation [-0.26702014  1.1940827  -0.2877456  -0.6052155  -0.31731537 -0.23667435\n",
      "  0.          0.        ]\n",
      "episode 4-step 69, taking action 2, observation [-0.269672    1.1806275  -0.2545948  -0.6003648  -0.32833663 -0.22042504\n",
      "  0.          0.        ]\n",
      "episode 4-step 70, taking action 2, observation [-0.27207035  1.1677893  -0.22918782 -0.5730429  -0.33945853 -0.22243837\n",
      "  0.          0.        ]\n",
      "episode 4-step 71, taking action 2, observation [-0.27433538  1.1556506  -0.2151784  -0.5422061  -0.35133374 -0.23750417\n",
      "  0.          0.        ]\n",
      "episode 4-step 72, taking action 3, observation [-0.27651477  1.1428568  -0.20435357 -0.57199484 -0.36565018 -0.28632864\n",
      "  0.          0.        ]\n",
      "episode 4-step 73, taking action 1, observation [-0.2787506   1.1295046  -0.21163222 -0.5965381  -0.37831762 -0.25334832\n",
      "  0.          0.        ]\n",
      "episode 4-step 74, taking action 2, observation [-0.28059402  1.1164968  -0.17301637 -0.5811773  -0.39037377 -0.24112359\n",
      "  0.          0.        ]\n",
      "episode 4-step 75, taking action 0, observation [-0.282437    1.1028906  -0.1730232  -0.6078552  -0.40242982 -0.24112114\n",
      "  0.          0.        ]\n",
      "episode 4-step 76, taking action 1, observation [-0.28436694  1.0887407  -0.18410823 -0.63143814 -0.4119865  -0.19113384\n",
      "  0.          0.        ]\n",
      "episode 4-step 77, taking action 3, observation [-0.28623444  1.073951   -0.17623417 -0.66041857 -0.4233379  -0.22702822\n",
      "  0.          0.        ]\n",
      "episode 4-step 78, taking action 0, observation [-0.2881015   1.058563   -0.17624071 -0.6870951  -0.43468922 -0.2270262\n",
      "  0.          0.        ]\n",
      "episode 4-step 79, taking action 1, observation [-0.29002362  1.0426207  -0.18335047 -0.7113286  -0.44436142 -0.19344327\n",
      "  0.          0.        ]\n",
      "episode 4-step 80, taking action 1, observation [-0.29201394  1.0261304  -0.19206305 -0.7351309  -0.45199263 -0.1526241\n",
      "  0.          0.        ]\n",
      "episode 4-step 81, taking action 3, observation [-0.29393712  1.0089977  -0.18361859 -0.76431    -0.46155933 -0.19133402\n",
      "  0.          0.        ]\n",
      "episode 4-step 82, taking action 0, observation [-0.29585996  0.9912658  -0.18362364 -0.79098356 -0.47112596 -0.19133283\n",
      "  0.          0.        ]\n",
      "episode 4-step 83, taking action 1, observation [-0.29785043  0.9729884  -0.19228555 -0.8146448  -0.47863123 -0.15010466\n",
      "  0.          0.        ]\n",
      "episode 4-step 84, taking action 3, observation [-0.29977998  0.9540539  -0.1844843  -0.8445009  -0.48807833 -0.18894169\n",
      "  0.          0.        ]\n",
      "episode 4-step 85, taking action 2, observation [-0.30142695  0.9349135  -0.15678447 -0.8535198  -0.49694064 -0.1772465\n",
      "  0.          0.        ]\n",
      "episode 4-step 86, taking action 2, observation [-0.30263194  0.91633016 -0.11271022 -0.8287708  -0.5057076  -0.1753397\n",
      "  0.          0.        ]\n",
      "episode 4-step 87, taking action 1, observation [-0.3039135   0.8972165  -0.12254043 -0.8515809  -0.51204616 -0.12677127\n",
      "  0.          0.        ]\n",
      "episode 4-step 88, taking action 1, observation [-0.3052753   0.87756014 -0.13269529 -0.8749345  -0.5159989  -0.07905483\n",
      "  0.          0.        ]\n",
      "episode 4-step 89, taking action 0, observation [-0.30663705  0.8573037  -0.13269626 -0.9016023  -0.51995164 -0.07905475\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 4-step 90, taking action 3, observation [-0.3079238   0.8363839  -0.12314955 -0.9318946  -0.52624404 -0.12584747\n",
      "  0.          0.        ]\n",
      "episode 4-step 91, taking action 3, observation [-0.30914408  0.81480044 -0.11465263 -0.96214855 -0.5346886  -0.16889061\n",
      "  0.          0.        ]\n",
      "episode 4-step 92, taking action 0, observation [-0.31036407  0.792618   -0.11465714 -0.9888204  -0.5431331  -0.16888979\n",
      "  0.          0.        ]\n",
      "episode 4-step 93, taking action 2, observation [-0.31127253  0.77069134 -0.08342905 -0.9775264  -0.5517087  -0.17151287\n",
      "  0.          0.        ]\n",
      "episode 4-step 94, taking action 3, observation [-0.31210572  0.74809355 -0.07383648 -1.0082746  -0.5627283  -0.22039235\n",
      "  0.          0.        ]\n",
      "episode 4-step 95, taking action 2, observation [-0.3125221   0.7256992  -0.03249011 -0.9991903  -0.57343954 -0.21422482\n",
      "  0.          0.        ]\n",
      "episode 4-step 96, taking action 0, observation [-0.31293797  0.70270604 -0.03249786 -1.0258651  -0.5841507  -0.2142231\n",
      "  0.          0.        ]\n",
      "episode 4-step 97, taking action 0, observation [-0.31335324  0.6791143  -0.03250573 -1.0525397  -0.59486175 -0.21422145\n",
      "  0.          0.        ]\n",
      "episode 4-step 98, taking action 1, observation [-0.31381583  0.654984   -0.03875262 -1.0758721  -0.60383004 -0.17936623\n",
      "  0.          0.        ]\n",
      "episode 4-step 99, taking action 1, observation [-0.31435522  0.6303326  -0.04862396 -1.0980814  -0.6102148  -0.1276961\n",
      "  0.          0.        ]\n",
      "episode 5-step 0, taking action 2, observation [-0.00549536  1.4256947  -0.27939537  0.347391    0.006127    0.05911131\n",
      "  0.          0.        ]\n",
      "episode 5-step 1, taking action 0, observation [-0.00825958  1.4329118  -0.2794047   0.32074812  0.00908056  0.0590764\n",
      "  0.          0.        ]\n",
      "episode 5-step 2, taking action 0, observation [-0.011024    1.439529   -0.27941346  0.29408285  0.01203369  0.05906789\n",
      "  0.          0.        ]\n",
      "episode 5-step 3, taking action 3, observation [-0.01370659  1.4455578  -0.26916736  0.26793954  0.01292769  0.01788157\n",
      "  0.          0.        ]\n",
      "episode 5-step 4, taking action 2, observation [-1.6579341e-02  1.4524839e+00 -2.8732294e-01  3.0781993e-01\n",
      "  1.2975132e-02  9.4924326e-04  0.0000000e+00  0.0000000e+00]\n",
      "episode 5-step 5, taking action 2, observation [-0.01962728  1.4598229  -0.3040443   0.3261887   0.01222812 -0.0149415\n",
      "  0.          0.        ]\n",
      "episode 5-step 6, taking action 3, observation [-0.02259293  1.466573   -0.29371175  0.3000287   0.00940825 -0.05640271\n",
      "  0.          0.        ]\n",
      "episode 5-step 7, taking action 0, observation [-0.02555838  1.4727232  -0.29370296  0.27336395  0.00658936 -0.05638279\n",
      "  0.          0.        ]\n",
      "episode 5-step 8, taking action 3, observation [-0.02843714  1.4782804  -0.28282627  0.2469974   0.00159093 -0.09997772\n",
      "  0.          0.        ]\n",
      "episode 5-step 9, taking action 1, observation [-3.1414032e-02  1.4832400e+00 -2.9512918e-01  2.2042315e-01\n",
      " -9.3659502e-04 -5.0555378e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 5-step 10, taking action 1, observation [-0.03445511  1.4875879  -0.30317366  0.19324002 -0.00185171 -0.01830409\n",
      "  0.          0.        ]\n",
      "episode 5-step 11, taking action 1, observation [-3.7567236e-02  1.4913464e+00 -3.1210357e-01  1.6704617e-01\n",
      " -9.7616157e-04  1.7512463e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 5-step 12, taking action 1, observation [-0.04076014  1.4945104  -0.32221532  0.1406239   0.00192508  0.05802993\n",
      "  0.          0.        ]\n",
      "episode 5-step 13, taking action 1, observation [-0.04403343  1.4970758  -0.33230618  0.11400476  0.00684594  0.09842579\n",
      "  0.          0.        ]\n",
      "episode 5-step 14, taking action 3, observation [-0.04721041  1.4990407  -0.3202389   0.08732075  0.00934448  0.04997561\n",
      "  0.          0.        ]\n",
      "episode 5-step 15, taking action 0, observation [-0.05038757  1.5004058  -0.3202452   0.0606516   0.01184411  0.04999708\n",
      "  0.          0.        ]\n",
      "episode 5-step 16, taking action 3, observation [-0.05350523  1.5011715  -0.3127785   0.03402125  0.01284507  0.02002128\n",
      "  0.          0.        ]\n",
      "episode 5-step 17, taking action 2, observation [-0.05658855  1.5025656  -0.3095619   0.06195236  0.01405601  0.02422098\n",
      "  0.          0.        ]\n",
      "episode 5-step 18, taking action 2, observation [-0.05986204  1.5041901  -0.32770905  0.07219338  0.01441439  0.00716828\n",
      "  0.          0.        ]\n",
      "episode 5-step 19, taking action 1, observation [-0.06323395  1.5052129  -0.3400642   0.04542637  0.01724904  0.05669828\n",
      "  0.          0.        ]\n",
      "episode 5-step 20, taking action 2, observation [-0.06661367  1.5063167  -0.34083566  0.04902601  0.0200827   0.05667821\n",
      "  0.          0.        ]\n",
      "episode 5-step 21, taking action 1, observation [-0.07006121  1.50681    -0.349352    0.02184958  0.02462447  0.09084331\n",
      "  0.          0.        ]\n",
      "episode 5-step 22, taking action 3, observation [-0.0734212   1.5067118  -0.33835185 -0.00439931  0.02695546  0.04662397\n",
      "  0.          0.        ]\n",
      "episode 5-step 23, taking action 3, observation [-0.0767106   1.5060108  -0.32951072 -0.03117269  0.02751707  0.01123339\n",
      "  0.          0.        ]\n",
      "episode 5-step 24, taking action 2, observation [-0.08009891  1.5059451  -0.33903682 -0.0029234   0.02772614  0.0041812\n",
      "  0.          0.        ]\n",
      "episode 5-step 25, taking action 1, observation [-0.08357029  1.5052843  -0.3494537  -0.02940951  0.03001864  0.04584975\n",
      "  0.          0.        ]\n",
      "episode 5-step 26, taking action 3, observation [-0.08698187  1.5040152  -0.34196848 -0.05641995  0.0308167   0.01596121\n",
      "  0.          0.        ]\n",
      "episode 5-step 27, taking action 1, observation [-0.09048118  1.5021486  -0.3529731  -0.08301768  0.03381674  0.06000076\n",
      "  0.          0.        ]\n",
      "episode 5-step 28, taking action 2, observation [-0.09414244  1.5009327  -0.36852667 -0.05409842  0.03619876  0.04764048\n",
      "  0.          0.        ]\n",
      "episode 5-step 29, taking action 0, observation [-0.09780359  1.4991169  -0.36852664 -0.08076557  0.03858078  0.04764047\n",
      "  0.          0.        ]\n",
      "episode 5-step 30, taking action 0, observation [-0.10146475  1.4967011  -0.36852664 -0.10743271  0.04096281  0.04764044\n",
      "  0.          0.        ]\n",
      "episode 5-step 31, taking action 3, observation [-0.10505734  1.4936844  -0.3599227  -0.1340961   0.04162316  0.01320705\n",
      "  0.          0.        ]\n",
      "episode 5-step 32, taking action 1, observation [-0.10871334  1.4900706  -0.36788195 -0.16068457  0.04387483  0.04503363\n",
      "  0.          0.        ]\n",
      "episode 5-step 33, taking action 3, observation [-0.11230316  1.4858614  -0.35958287 -0.18709005  0.04446176  0.01173841\n",
      "  0.          0.        ]\n",
      "episode 5-step 34, taking action 3, observation [-0.11581717  1.4810553  -0.35005918 -0.21356408  0.04314007 -0.02643379\n",
      "  0.          0.        ]\n",
      "episode 5-step 35, taking action 2, observation [-0.11952333  1.4764714  -0.3684925  -0.20366904  0.04103907 -0.04201996\n",
      "  0.          0.        ]\n",
      "episode 5-step 36, taking action 0, observation [-0.1232295   1.4712875  -0.36849248 -0.2303361   0.03893807 -0.04201994\n",
      "  0.          0.        ]\n",
      "episode 5-step 37, taking action 1, observation [-0.12702894  1.4654962  -0.3801974  -0.25739878  0.03918524  0.00494309\n",
      "  0.          0.        ]\n",
      "episode 5-step 38, taking action 2, observation [-0.13100204  1.4599047  -0.39683908 -0.24849965  0.03872649 -0.00917509\n",
      "  0.          0.        ]\n",
      "episode 5-step 39, taking action 2, observation [-0.13511944  1.4545703  -0.41070256 -0.2370569   0.0377025  -0.02047961\n",
      "  0.          0.        ]\n",
      "episode 5-step 40, taking action 0, observation [-0.13923684  1.4486359  -0.41070256 -0.26372367  0.0366785  -0.0204797\n",
      "  0.          0.        ]\n",
      "episode 5-step 41, taking action 1, observation [-0.14344263  1.4420884  -0.42181405 -0.29103816  0.03788728  0.02417562\n",
      "  0.          0.        ]\n",
      "episode 5-step 42, taking action 2, observation [-0.1476678   1.4359376  -0.42376262 -0.27339566  0.03912348  0.02472384\n",
      "  0.          0.        ]\n",
      "episode 5-step 43, taking action 3, observation [-0.15181379  1.4291999  -0.41382688 -0.29943213  0.03836205 -0.01522874\n",
      "  0.          0.        ]\n",
      "episode 5-step 44, taking action 3, observation [-0.15589914  1.4218711  -0.40622106 -0.32567173  0.03607231 -0.04579445\n",
      "  0.          0.        ]\n",
      "episode 5-step 45, taking action 0, observation [-0.1599845   1.4139421  -0.406221   -0.35233885  0.03378259 -0.04579443\n",
      "  0.          0.        ]\n",
      "episode 5-step 46, taking action 1, observation [-0.16414824  1.405403   -0.41604838 -0.37951416  0.03346635 -0.00632488\n",
      "  0.          0.        ]\n",
      "episode 5-step 47, taking action 1, observation [-0.168371    1.3962513  -0.42348075 -0.4067691   0.03464518  0.0235767\n",
      "  0.          0.        ]\n",
      "episode 5-step 48, taking action 3, observation [-0.17252064  1.3865062  -0.41428095 -0.43310425  0.03397838 -0.01333598\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 5-step 49, taking action 2, observation [-0.176828    1.3770233  -0.42940623 -0.42143163  0.03267246 -0.0261185\n",
      "  0.          0.        ]\n",
      "episode 5-step 50, taking action 0, observation [-0.18113537  1.3669405  -0.42940623 -0.44809848  0.03136656 -0.02611832\n",
      "  0.          0.        ]\n",
      "episode 5-step 51, taking action 3, observation [-0.18534708  1.3562657  -0.41742125 -0.47436234  0.02765685 -0.07419401\n",
      "  0.          0.        ]\n",
      "episode 5-step 52, taking action 1, observation [-0.18965062  1.3449857  -0.4289216  -0.50129956  0.02625223 -0.02809263\n",
      "  0.          0.        ]\n",
      "episode 5-step 53, taking action 0, observation [-0.19395408  1.333106   -0.4289216  -0.52796644  0.02484761 -0.02809263\n",
      "  0.          0.        ]\n",
      "episode 5-step 54, taking action 2, observation [-0.19822149  1.3214474  -0.42554387 -0.5181361   0.02367266 -0.02349924\n",
      "  0.          0.        ]\n",
      "episode 5-step 55, taking action 3, observation [-0.20241666  1.3091956  -0.41649526 -0.54447615  0.02068313 -0.05979031\n",
      "  0.          0.        ]\n",
      "episode 5-step 56, taking action 1, observation [-0.2066718   1.2963362  -0.42401475 -0.5715118   0.01920208 -0.02962101\n",
      "  0.          0.        ]\n",
      "episode 5-step 57, taking action 0, observation [-0.21092692  1.2828768  -0.42401475 -0.5981786   0.01772103 -0.02962101\n",
      "  0.          0.        ]\n",
      "episode 5-step 58, taking action 1, observation [-0.21526413  1.268818   -0.43430838 -0.6248469   0.01830109  0.01160148\n",
      "  0.          0.        ]\n",
      "episode 5-step 59, taking action 1, observation [-0.21966295  1.2541558  -0.4420375  -0.65168226  0.02042994  0.04257699\n",
      "  0.          0.        ]\n",
      "episode 5-step 60, taking action 1, observation [-0.22412744  1.2388897  -0.45027885 -0.67854667  0.02421044  0.07561012\n",
      "  0.          0.        ]\n",
      "episode 5-step 61, taking action 3, observation [-0.2285194   1.2230384  -0.44117022 -0.70453584  0.02616111  0.03901317\n",
      "  0.          0.        ]\n",
      "episode 5-step 62, taking action 3, observation [-2.3283167e-01  1.2065825e+00 -4.3118429e-01 -7.3137414e-01\n",
      "  2.6114315e-02 -9.3590206e-04  0.0000000e+00  0.0000000e+00]\n",
      "episode 5-step 63, taking action 0, observation [-2.3714399e-01  1.1895266e+00 -4.3118435e-01 -7.5804079e-01\n",
      "  2.6067520e-02 -9.3598070e-04  0.0000000e+00  0.0000000e+00]\n",
      "episode 5-step 64, taking action 3, observation [-0.24135819  1.1718842  -0.41886955 -0.78406066  0.02354893 -0.0503718\n",
      "  0.          0.        ]\n",
      "episode 5-step 65, taking action 2, observation [-0.24542984  1.1543133  -0.40533137 -0.78089875  0.02173822 -0.03621415\n",
      "  0.          0.        ]\n",
      "episode 5-step 66, taking action 0, observation [-0.24950151  1.1361425  -0.4053313  -0.8075656   0.01992751 -0.03621428\n",
      "  0.          0.        ]\n",
      "episode 5-step 67, taking action 0, observation [-0.25357312  1.1173718  -0.40533128 -0.8342325   0.0181168  -0.03621428\n",
      "  0.          0.        ]\n",
      "episode 5-step 68, taking action 3, observation [-0.25755358  1.0980035  -0.3938873  -0.86076677  0.01401372 -0.08206156\n",
      "  0.          0.        ]\n",
      "episode 5-step 69, taking action 0, observation [-0.26153398  1.0780356  -0.39388725 -0.8874349   0.00991066 -0.08206134\n",
      "  0.          0.        ]\n",
      "episode 5-step 70, taking action 0, observation [-0.26551443  1.0574677  -0.39388722 -0.9141029   0.0058076  -0.08206127\n",
      "  0.          0.        ]\n",
      "episode 5-step 71, taking action 2, observation [-2.6965556e-01  1.0369377e+00 -4.0921554e-01 -9.1243458e-01\n",
      "  9.7236125e-04 -9.6704856e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 5-step 72, taking action 2, observation [-0.27395815  1.0168376  -0.4246053  -0.89334416 -0.0046069  -0.11158516\n",
      "  0.          0.        ]\n",
      "episode 5-step 73, taking action 0, observation [-0.2782607   0.9961378  -0.4246053  -0.9200134  -0.01018615 -0.11158492\n",
      "  0.          0.        ]\n",
      "episode 5-step 74, taking action 1, observation [-0.2826394   0.9748298  -0.43415403 -0.94705397 -0.01385463 -0.07336967\n",
      "  0.          0.        ]\n",
      "episode 5-step 75, taking action 1, observation [-0.2870969   0.95292896 -0.44403648 -0.9733916  -0.01554245 -0.03375632\n",
      "  0.          0.        ]\n",
      "episode 5-step 76, taking action 0, observation [-0.2915543   0.93042785 -0.44403642 -1.0000585  -0.01723025 -0.03375607\n",
      "  0.          0.        ]\n",
      "episode 5-step 77, taking action 0, observation [-0.29601178  0.9073271  -0.44403648 -1.0267254  -0.01891806 -0.03375604\n",
      "  0.          0.        ]\n",
      "episode 5-step 78, taking action 3, observation [-0.3003909   0.88361293 -0.43420163 -1.0540134  -0.02257954 -0.07322987\n",
      "  0.          0.        ]\n",
      "episode 5-step 79, taking action 0, observation [-0.30477     0.85929894 -0.43420166 -1.0806812  -0.02624103 -0.07322978\n",
      "  0.          0.        ]\n",
      "episode 5-step 80, taking action 1, observation [-0.30923486  0.83437914 -0.44495472 -1.1075782  -0.02775208 -0.03022106\n",
      "  0.          0.        ]\n",
      "episode 5-step 81, taking action 1, observation [-0.31376314  0.8088624  -0.45290798 -1.1340687  -0.02766912  0.00165924\n",
      "  0.          0.        ]\n",
      "episode 5-step 82, taking action 2, observation [-0.31842232  0.78340334 -0.46533075 -1.1315271  -0.02824791 -0.01157584\n",
      "  0.          0.        ]\n",
      "episode 5-step 83, taking action 2, observation [-0.32323274  0.7583316  -0.47966284 -1.1143224  -0.02961254 -0.02729289\n",
      "  0.          0.        ]\n",
      "episode 5-step 84, taking action 1, observation [-0.32810622  0.7326589  -0.4875594  -1.1410078  -0.02939669  0.00431705\n",
      "  0.          0.        ]\n",
      "episode 5-step 85, taking action 3, observation [-0.33291203  0.7063757  -0.4790725  -1.1681657  -0.0308852  -0.02977045\n",
      "  0.          0.        ]\n",
      "episode 5-step 86, taking action 0, observation [-0.33771783  0.67949265 -0.4790725  -1.1948324  -0.03237375 -0.02977075\n",
      "  0.          0.        ]\n",
      "episode 5-step 87, taking action 1, observation [-0.34260225  0.6520107  -0.48893428 -1.2214134  -0.03188731  0.00972878\n",
      "  0.          0.        ]\n",
      "episode 5-step 88, taking action 3, observation [-0.3474059   0.6239156  -0.4788035  -1.2487055  -0.03343625 -0.03097878\n",
      "  0.          0.        ]\n",
      "episode 5-step 89, taking action 1, observation [-3.5227194e-01  5.9523010e-01 -4.8662201e-01 -1.2749075e+00\n",
      " -3.3414401e-02  4.3700897e-04  0.0000000e+00  0.0000000e+00]\n",
      "episode 5-step 90, taking action 0, observation [-3.5713792e-01  5.6594467e-01 -4.8662201e-01 -1.3015741e+00\n",
      " -3.3392556e-02  4.3700644e-04  0.0000000e+00  0.0000000e+00]\n",
      "episode 5-step 91, taking action 2, observation [-0.36191487  0.5375597  -0.47797626 -1.2615501  -0.0331138   0.00557494\n",
      "  0.          0.        ]\n",
      "episode 5-step 92, taking action 0, observation [-0.36669183  0.50857466 -0.47797626 -1.2882167  -0.03283506  0.00557498\n",
      "  0.          0.        ]\n",
      "episode 5-step 93, taking action 2, observation [-0.3715952   0.4804262  -0.4898808  -1.2510507  -0.03328679 -0.00903458\n",
      "  0.          0.        ]\n",
      "episode 5-step 94, taking action 2, observation [-0.37642217  0.45287663 -0.48247248 -1.2244315  -0.0335086  -0.00443624\n",
      "  0.          0.        ]\n",
      "episode 5-step 95, taking action 3, observation [-0.38115185  0.42471635 -0.47026858 -1.2516285  -0.03617981 -0.05342412\n",
      "  0.          0.        ]\n",
      "episode 5-step 96, taking action 2, observation [-0.38582182  0.39668706 -0.4644873  -1.2458092  -0.03865635 -0.04953087\n",
      "  0.          0.        ]\n",
      "episode 5-step 97, taking action 2, observation [-0.39055365  0.36953717 -0.47021073 -1.2067419  -0.04159586 -0.05879023\n",
      "  0.          0.        ]\n",
      "episode 5-step 98, taking action 3, observation [-0.39520782  0.3417831  -0.46046108 -1.2336597  -0.04649015 -0.09788577\n",
      "  0.          0.        ]\n",
      "episode 5-step 99, taking action 2, observation [-0.39969397  0.31406122 -0.44435376 -1.232221   -0.05069874 -0.08417197\n",
      "  0.          0.        ]\n",
      "episode 6-step 0, taking action 0, observation [-0.00518513  1.411742   -0.26223892  0.00541947  0.00594995  0.05879085\n",
      "  0.          0.        ]\n",
      "episode 6-step 1, taking action 1, observation [-0.00783901  1.4112558  -0.2699188  -0.02162774  0.0104266   0.08954132\n",
      "  0.          0.        ]\n",
      "episode 6-step 2, taking action 2, observation [-0.01056547  1.4116642  -0.27685872  0.01811042  0.01460498  0.08357517\n",
      "  0.          0.        ]\n",
      "episode 6-step 3, taking action 2, observation [-0.01328554  1.413025   -0.2763401   0.06043253  0.01888569  0.08562247\n",
      "  0.          0.        ]\n",
      "episode 6-step 4, taking action 0, observation [-0.0160058   1.4137858  -0.27635252  0.03375627  0.02316559  0.0856056\n",
      "  0.          0.        ]\n",
      "episode 6-step 5, taking action 3, observation [-0.01865425  1.4139447  -0.26734596  0.00702169  0.02563751  0.04944304\n",
      "  0.          0.        ]\n",
      "episode 6-step 6, taking action 1, observation [-0.02139607  1.4134986  -0.27905646 -0.01991323  0.03045807  0.09642039\n",
      "  0.          0.        ]\n",
      "episode 6-step 7, taking action 3, observation [-0.02406082  1.412449   -0.26938266 -0.04671601  0.03333663  0.05757611\n",
      "  0.          0.        ]\n",
      "episode 6-step 8, taking action 3, observation [-0.0266614   1.4108013  -0.26133734 -0.07325602  0.03460105  0.02529094\n",
      "  0.          0.        ]\n",
      "episode 6-step 9, taking action 1, observation [-0.02934046  1.408555   -0.2711631  -0.09991754  0.03783324  0.06464936\n",
      "  0.          0.        ]\n",
      "episode 6-step 10, taking action 0, observation [-0.03201962  1.4057087  -0.27117318 -0.12658702  0.04106374  0.06461561\n",
      "  0.          0.        ]\n",
      "episode 6-step 11, taking action 2, observation [-0.03466044  1.4036201  -0.26770592 -0.09293477  0.04465065  0.07174459\n",
      "  0.          0.        ]\n",
      "episode 6-step 12, taking action 3, observation [-0.03723822  1.4009354  -0.25980097 -0.11938212  0.04664814  0.03995356\n",
      "  0.          0.        ]\n",
      "episode 6-step 13, taking action 1, observation [-0.03988733  1.3976476  -0.2687356  -0.1462462   0.05043723  0.07578859\n",
      "  0.          0.        ]\n",
      "episode 6-step 14, taking action 2, observation [-0.04268608  1.3952785  -0.28322566 -0.10541181  0.0537623   0.06650738\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 6-step 15, taking action 1, observation [-0.0455451   1.3923061  -0.29079455 -0.13228662  0.05860453  0.09685314\n",
      "  0.          0.        ]\n",
      "episode 6-step 16, taking action 3, observation [-0.0483285   1.3887492  -0.28127944 -0.15820049  0.0615222   0.05835867\n",
      "  0.          0.        ]\n",
      "episode 6-step 17, taking action 1, observation [-0.05120888  1.3845851  -0.29345402 -0.18529703  0.06688374  0.10724036\n",
      "  0.          0.        ]\n",
      "episode 6-step 18, taking action 0, observation [-0.05408955  1.3798215  -0.29347005 -0.21196868  0.07224306  0.10719603\n",
      "  0.          0.        ]\n",
      "episode 6-step 19, taking action 1, observation [-0.05705299  1.3744425  -0.30385727 -0.2394512   0.07969698  0.14909165\n",
      "  0.          0.        ]\n",
      "episode 6-step 20, taking action 2, observation [-0.06020584  1.3697388  -0.32222554 -0.20943579  0.08658547  0.13778248\n",
      "  0.          0.        ]\n",
      "episode 6-step 21, taking action 1, observation [-0.06344614  1.3644347  -0.33317795 -0.23628174  0.09566154  0.18153767\n",
      "  0.          0.        ]\n",
      "episode 6-step 22, taking action 0, observation [-0.06668682  1.358532   -0.33320397 -0.2629541   0.10473509  0.18148722\n",
      "  0.          0.        ]\n",
      "episode 6-step 23, taking action 0, observation [-0.06992779  1.3520302  -0.33322817 -0.28963062  0.11380772  0.18146871\n",
      "  0.          0.        ]\n",
      "episode 6-step 24, taking action 1, observation [-0.07324286  1.3448838  -0.34257773 -0.3185009   0.12482143  0.22027385\n",
      "  0.          0.        ]\n",
      "episode 6-step 25, taking action 3, observation [-0.07648611  1.3371508  -0.33355537 -0.34448573  0.13401005  0.1837723\n",
      "  0.          0.        ]\n",
      "episode 6-step 26, taking action 1, observation [-0.07980718  1.3287934  -0.34334153 -0.3725011   0.145209    0.223979\n",
      "  0.          0.        ]\n",
      "episode 6-step 27, taking action 0, observation [-0.08312845  1.3198373  -0.34333932 -0.39917815  0.15640786  0.22397704\n",
      "  0.          0.        ]\n",
      "episode 6-step 28, taking action 3, observation [-0.08636961  1.3102912  -0.3333037  -0.42527723  0.16558795  0.18360199\n",
      "  0.          0.        ]\n",
      "episode 6-step 29, taking action 0, observation [-0.08961096  1.3001459  -0.33330196 -0.4519509   0.17476797  0.1836008\n",
      "  0.          0.        ]\n",
      "episode 6-step 30, taking action 1, observation [-0.09293862  1.289372   -0.3441522  -0.4802168   0.18619289  0.22849783\n",
      "  0.          0.        ]\n",
      "episode 6-step 31, taking action 3, observation [-0.09619055  1.2780212  -0.33460954 -0.5056918   0.19565609  0.18926439\n",
      "  0.          0.        ]\n",
      "episode 6-step 32, taking action 3, observation [-0.09938021  1.2660866  -0.32676497 -0.53147334  0.20351313  0.1571407\n",
      "  0.          0.        ]\n",
      "episode 6-step 33, taking action 0, observation [-0.10256996  1.2535529  -0.32676342 -0.5581451   0.21137014  0.15714005\n",
      "  0.          0.        ]\n",
      "episode 6-step 34, taking action 0, observation [-0.10575981  1.24042    -0.3267618  -0.5848168   0.21922712  0.1571395\n",
      "  0.          0.        ]\n",
      "episode 6-step 35, taking action 2, observation [-0.10914278  1.2275426  -0.34576494 -0.5734584   0.2267856   0.15116975\n",
      "  0.          0.        ]\n",
      "episode 6-step 36, taking action 1, observation [-0.11258688  1.214036   -0.35348758 -0.601715    0.23598623  0.18401238\n",
      "  0.          0.        ]\n",
      "episode 6-step 37, taking action 0, observation [-0.11603117  1.1999307  -0.3534851  -0.62838846  0.24518679  0.18401131\n",
      "  0.          0.        ]\n",
      "episode 6-step 38, taking action 0, observation [-0.11947556  1.1852262  -0.35348254 -0.65506196  0.2543873   0.18401024\n",
      "  0.          0.        ]\n",
      "episode 6-step 39, taking action 3, observation [-0.1228384   1.1699451  -0.34321314 -0.6803802   0.2614692   0.14163776\n",
      "  0.          0.        ]\n",
      "episode 6-step 40, taking action 3, observation [-0.12613134  1.1541002  -0.3343678  -0.70513237  0.2666526   0.10366823\n",
      "  0.          0.        ]\n",
      "episode 6-step 41, taking action 1, observation [-0.12950984  1.1376243  -0.34510842 -0.73360753  0.27409092  0.14876579\n",
      "  0.          0.        ]\n",
      "episode 6-step 42, taking action 1, observation [-0.13296433  1.1205285  -0.3546181  -0.76156     0.2834937   0.1880556\n",
      "  0.          0.        ]\n",
      "episode 6-step 43, taking action 1, observation [-0.13649979  1.1027951  -0.36479726 -0.7903761   0.2950807   0.23173967\n",
      "  0.          0.        ]\n",
      "episode 6-step 44, taking action 0, observation [-0.14003554  1.0844636  -0.36479244 -0.81705356  0.30666757  0.23173746\n",
      "  0.          0.        ]\n",
      "episode 6-step 45, taking action 0, observation [-0.14357176  1.0655336  -0.36478746 -0.8437309   0.31825432  0.23173535\n",
      "  0.          0.        ]\n",
      "episode 6-step 46, taking action 2, observation [-0.147229    1.0467023  -0.3769895  -0.83947146  0.32998285  0.23457024\n",
      "  0.          0.        ]\n",
      "episode 6-step 47, taking action 3, observation [-0.15081921  1.0273141  -0.36842316 -0.86387646  0.3398109   0.19656119\n",
      "  0.          0.        ]\n",
      "episode 6-step 48, taking action 2, observation [-0.15482683  1.008754   -0.40980282 -0.8270522   0.34928563  0.18949446\n",
      "  0.          0.        ]\n",
      "episode 6-step 49, taking action 0, observation [-0.15883474  0.98959523 -0.409799   -0.85372597  0.35876027  0.18949327\n",
      "  0.          0.        ]\n",
      "episode 6-step 50, taking action 3, observation [-0.16276169  0.96988934 -0.3994879  -0.8775311   0.3659214   0.14322305\n",
      "  0.          0.        ]\n",
      "episode 6-step 51, taking action 0, observation [-0.16668892  0.94958395 -0.39948562 -0.9042016   0.37308255  0.14322259\n",
      "  0.          0.        ]\n",
      "episode 6-step 52, taking action 1, observation [-0.17067966  0.9286334  -0.4075675  -0.9333819   0.38209713  0.18029134\n",
      "  0.          0.        ]\n",
      "episode 6-step 53, taking action 2, observation [-0.17493725  0.9084505  -0.43462914 -0.8994305   0.39153805  0.18881862\n",
      "  0.          0.        ]\n",
      "episode 6-step 54, taking action 3, observation [-0.1791133   0.8877268  -0.42419463 -0.9228814   0.3985833   0.14090529\n",
      "  0.          0.        ]\n",
      "episode 6-step 55, taking action 0, observation [-0.18328962  0.86640376 -0.42419225 -0.94955194  0.40562853  0.14090481\n",
      "  0.          0.        ]\n",
      "episode 6-step 56, taking action 1, observation [-0.18753882  0.84442234 -0.43351096 -0.9794359   0.41486844  0.18479756\n",
      "  0.          0.        ]\n",
      "episode 6-step 57, taking action 0, observation [-0.1917882   0.82184213 -0.43350667 -1.0061091   0.42410824  0.18479647\n",
      "  0.          0.        ]\n",
      "episode 6-step 58, taking action 3, observation [-0.19595031  0.7987268  -0.422342   -1.0292121   0.4307558   0.13295129\n",
      "  0.          0.        ]\n",
      "episode 6-step 59, taking action 2, observation [-0.20034337  0.77562994 -0.44526234 -1.0283684   0.43722862  0.12945704\n",
      "  0.          0.        ]\n",
      "episode 6-step 60, taking action 3, observation [-0.20467845  0.7519657  -0.4379394  -1.0531363   0.44206932  0.09681428\n",
      "  0.          0.        ]\n",
      "episode 6-step 61, taking action 1, observation [-0.2090797   0.72765684 -0.44631833 -1.0823586   0.4488396   0.13540551\n",
      "  0.          0.        ]\n",
      "episode 6-step 62, taking action 3, observation [-0.21342596  0.7027924  -0.43928123 -1.1065941   0.45394138  0.10203578\n",
      "  0.          0.        ]\n",
      "episode 6-step 63, taking action 2, observation [-0.21803065  0.67859805 -0.4656871  -1.0770154   0.45968664  0.11490544\n",
      "  0.          0.        ]\n",
      "episode 6-step 64, taking action 0, observation [-0.22263542  0.653804   -0.46568522 -1.1036845   0.46543193  0.11490557\n",
      "  0.          0.        ]\n",
      "episode 6-step 65, taking action 2, observation [-0.22771159  0.629419   -0.5123115  -1.0853574   0.47062472  0.10385563\n",
      "  0.          0.        ]\n",
      "episode 6-step 66, taking action 3, observation [-0.23273715  0.6044793  -0.505812   -1.1095433   0.47423214  0.0721487\n",
      "  0.          0.        ]\n",
      "episode 6-step 67, taking action 2, observation [-0.23806262  0.58015144 -0.5362203  -1.0825039   0.4783117   0.08159107\n",
      "  0.          0.        ]\n",
      "episode 6-step 68, taking action 3, observation [-0.24331012  0.5552758  -0.5263654  -1.1061442   0.48011315  0.03602909\n",
      "  0.          0.        ]\n",
      "episode 6-step 69, taking action 0, observation [-0.24855757  0.5298002  -0.52636516 -1.1328111   0.48191455  0.03602861\n",
      "  0.          0.        ]\n",
      "episode 6-step 70, taking action 0, observation [-0.25380507  0.50372463 -0.526365   -1.159478    0.48371604  0.03602921\n",
      "  0.          0.        ]\n",
      "episode 6-step 71, taking action 2, observation [-0.25957543  0.4779904  -0.5779394  -1.1440617   0.48472607  0.0202009\n",
      "  0.          0.        ]\n",
      "episode 6-step 72, taking action 1, observation [-0.26539555  0.45160735 -0.5843526  -1.1734067   0.4873443   0.05236428\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 6-step 73, taking action 1, observation [-0.27127337  0.42459092 -0.59158576 -1.2020783   0.4915967   0.08504798\n",
      "  0.          0.        ]\n",
      "episode 6-step 74, taking action 0, observation [-0.2771513   0.3969748  -0.5915847  -1.2287464   0.49584907  0.08504777\n",
      "  0.          0.        ]\n",
      "episode 6-step 75, taking action 0, observation [-0.28302926  0.36875883 -0.5915836  -1.2554144   0.50010145  0.08504769\n",
      "  0.          0.        ]\n",
      "episode 6-step 76, taking action 1, observation [-0.28897357  0.33989877 -0.59992874 -1.2846848   0.5062917   0.12380467\n",
      "  0.          0.        ]\n",
      "episode 6-step 77, taking action 3, observation [-0.2948621   0.31048816 -0.592775   -1.3085961   0.51072407  0.0886472\n",
      "  0.          0.        ]\n",
      "episode 6-step 78, taking action 2, observation [-0.301085    0.28133005 -0.6261423  -1.2973624   0.5150921   0.08736067\n",
      "  0.          0.        ]\n",
      "episode 6-step 79, taking action 2, observation [-0.30769864  0.25213838 -0.66458327 -1.2986286   0.51874727  0.07310378\n",
      "  0.          0.        ]\n",
      "episode 6-step 80, taking action 3, observation [-0.31423792  0.22240685 -0.6551338  -1.3218598   0.5201157   0.02736825\n",
      "  0.          0.        ]\n",
      "episode 6-step 81, taking action 0, observation [-0.32077724  0.19207536 -0.6551336  -1.3485266   0.5214841   0.02736809\n",
      "  0.          0.        ]\n",
      "episode 6-step 82, taking action 2, observation [-0.32762712  0.16242622 -0.6868273  -1.3184441   0.5235765   0.04184821\n",
      "  0.          0.        ]\n",
      "episode 6-step 83, taking action 2, observation [-0.33475685  0.13320859 -0.71521944 -1.2994275   0.52613807  0.05123125\n",
      "  0.          0.        ]\n",
      "episode 6-step 84, taking action 0, observation [-0.3418866   0.10339101 -0.715219   -1.3260945   0.52869964  0.05123166\n",
      "  0.          0.        ]\n",
      "episode 6-step 85, taking action 2, observation [-0.34957036  0.07410759 -0.7702255  -1.3022085   0.530812    0.04224823\n",
      "  0.          0.        ]\n",
      "episode 6-step 86, taking action 1, observation [-0.35732883  0.04416462 -0.77968913 -1.3323113   0.53521574  0.08807408\n",
      "  0.          0.        ]\n",
      "episode 6-step 87, taking action 0, observation [-0.36508742  0.01362184 -0.7796878  -1.3589793   0.53961945  0.0880737\n",
      "  0.          0.        ]\n",
      "episode 6-step 88, taking action 2, observation [-0.37337583 -0.01626812 -0.8325635  -1.3299338   0.5439051   0.08571281\n",
      "  0.          1.        ]\n",
      "episode 6-step 89, taking action 1, observation [-0.38136306 -0.04487871 -0.78967845 -1.2678634   0.536421   -0.14589408\n",
      "  0.          1.        ]\n",
      "episode 6-step 90, taking action 0, observation [-0.3896357  -0.07359476 -0.8006915  -1.2665702   0.5103231  -0.5171577\n",
      "  0.          1.        ]\n",
      "episode 6-step 91, taking action 2, observation [-0.3991143  -0.08482203 -0.7581919  -0.4440674   0.30552942 -4.0917068\n",
      "  1.          0.        ]\n",
      "episode 6-step 92, taking action 3, observation [-0.4085649  -0.09552168 -0.75649303 -0.44866437  0.11849514 -3.736145\n",
      "  1.          0.        ]\n",
      "episode 6-step 93, taking action 0, observation [-0.41710743 -0.10029673 -0.81978416 -0.12230621  0.07421335  0.02625216\n",
      "  1.          0.        ]\n",
      "episode 6-step 94, taking action 1, observation [-0.42536855 -0.10291367 -0.83243954 -0.12168961  0.08136746  0.23474982\n",
      "  1.          0.        ]\n",
      "episode 6-step 95, taking action 0, observation [-0.43350068 -0.10595386 -0.83383787 -0.13664415  0.10185973  0.41417757\n",
      "  1.          0.        ]\n",
      "episode 6-step 96, taking action 1, observation [-0.44207653 -0.10667323 -0.83761454 -0.05203077  0.08339348 -0.15776779\n",
      "  1.          0.        ]\n",
      "episode 6-step 97, taking action 3, observation [-0.4504242  -0.10770438 -0.82877284 -0.04571864  0.07717856 -0.12216829\n",
      "  1.          0.        ]\n",
      "episode 6-step 98, taking action 2, observation [-0.45875034 -0.1083283  -0.83129203 -0.02786554  0.07587343 -0.02406382\n",
      "  1.          0.        ]\n",
      "episode 6-step 99, taking action 0, observation [-0.4670814  -0.10905514 -0.8305833  -0.03234099  0.07336941 -0.04845913\n",
      "  1.          0.        ]\n",
      "episode 7-step 0, taking action 1, observation [-0.00850945  1.4118882  -0.43705097  0.00881732  0.01190209  0.14035451\n",
      "  0.          0.        ]\n",
      "episode 7-step 1, taking action 0, observation [-0.01280918  1.4114885  -0.4370728  -0.01784164  0.01891493  0.14027023\n",
      "  0.          0.        ]\n",
      "episode 7-step 2, taking action 3, observation [-0.01704903  1.4104867  -0.4295419  -0.04460227  0.02441375  0.10998641\n",
      "  0.          0.        ]\n",
      "episode 7-step 3, taking action 1, observation [-0.021385    1.4088744  -0.44161096 -0.07181361  0.03233368  0.1584133\n",
      "  0.          0.        ]\n",
      "episode 7-step 4, taking action 0, observation [-0.02572126  1.4066628  -0.4416356  -0.0984863   0.04025096  0.15836012\n",
      "  0.          0.        ]\n",
      "episode 7-step 5, taking action 0, observation [-0.03005781  1.403852   -0.44165945 -0.12515925  0.04816747  0.15834501\n",
      "  0.          0.        ]\n",
      "episode 7-step 6, taking action 1, observation [-0.03448915  1.4004257  -0.45354342 -0.15264304  0.05847291  0.20612812\n",
      "  0.          0.        ]\n",
      "episode 7-step 7, taking action 2, observation [-0.03913288  1.3971763  -0.4738956  -0.14482173  0.06791263  0.18881184\n",
      "  0.          0.        ]\n",
      "episode 7-step 8, taking action 2, observation [-0.04382505  1.3938887  -0.47862706 -0.14655899  0.07724439  0.18665203\n",
      "  0.          0.        ]\n",
      "episode 7-step 9, taking action 2, observation [-0.04877472  1.3915014  -0.5035163  -0.10656419  0.0857236   0.16959973\n",
      "  0.          0.        ]\n",
      "episode 7-step 10, taking action 2, observation [-0.05394421  1.389289   -0.52466714 -0.09877948  0.09337533  0.15304886\n",
      "  0.          0.        ]\n",
      "episode 7-step 11, taking action 3, observation [-0.05905018  1.3864852  -0.5166686  -0.12500785  0.0994101   0.12070625\n",
      "  0.          0.        ]\n",
      "episode 7-step 12, taking action 0, observation [-0.06415634  1.3830819  -0.5166849  -0.15167691  0.10544426  0.12069412\n",
      "  0.          0.        ]\n",
      "episode 7-step 13, taking action 0, observation [-0.0692626   1.3790789  -0.51670194 -0.17834899  0.11147714  0.12066869\n",
      "  0.          0.        ]\n",
      "episode 7-step 14, taking action 0, observation [-0.07436924  1.3744763  -0.5167189  -0.20502158  0.11750907  0.12064983\n",
      "  0.          0.        ]\n",
      "episode 7-step 15, taking action 1, observation [-0.07953606  1.3692617  -0.52427566 -0.23236966  0.1250708   0.15124826\n",
      "  0.          0.        ]\n",
      "episode 7-step 16, taking action 1, observation [-0.08479013  1.3634429  -0.5351874  -0.25946328  0.13481577  0.19491735\n",
      "  0.          0.        ]\n",
      "episode 7-step 17, taking action 1, observation [-0.09011183  1.3570156  -0.5436348  -0.28673157  0.14626037  0.2289124\n",
      "  0.          0.        ]\n",
      "episode 7-step 18, taking action 1, observation [-0.09551287  1.3499764  -0.5535432  -0.31423283  0.15970626  0.26894215\n",
      "  0.          0.        ]\n",
      "episode 7-step 19, taking action 2, observation [-0.10085335  1.3431327  -0.54814076 -0.30573434  0.173823    0.28236014\n",
      "  0.          0.        ]\n",
      "episode 7-step 20, taking action 1, observation [-0.10628939  1.3356651  -0.5600809  -0.33390522  0.19038309  0.33123195\n",
      "  0.          0.        ]\n",
      "episode 7-step 21, taking action 2, observation [-0.11181536  1.3290008  -0.569432   -0.2984428   0.20735396  0.3394484\n",
      "  0.          0.        ]\n",
      "episode 7-step 22, taking action 1, observation [-0.11741457  1.321724   -0.578507   -0.32613164  0.22617011  0.3763565\n",
      "  0.          0.        ]\n",
      "episode 7-step 23, taking action 3, observation [-0.12293549  1.3138816  -0.56855226 -0.3511695   0.24288964  0.33442086\n",
      "  0.          0.        ]\n",
      "episode 7-step 24, taking action 0, observation [-0.12845393  1.3054217  -0.56824327 -0.3787937   0.25960883  0.33438402\n",
      "  0.          0.        ]\n",
      "episode 7-step 25, taking action 2, observation [-0.1339446   1.2971028  -0.5660919  -0.37283975  0.27703795  0.34858215\n",
      "  0.          0.        ]\n",
      "episode 7-step 26, taking action 3, observation [-0.1393755   1.2882218  -0.55843234 -0.3977008   0.29280388  0.3153186\n",
      "  0.          0.        ]\n",
      "episode 7-step 27, taking action 2, observation [-0.14486226  1.279521   -0.5643639  -0.38993713  0.3090171   0.3242639\n",
      "  0.          0.        ]\n",
      "episode 7-step 28, taking action 3, observation [-0.15026999  1.2702587  -0.55432177 -0.4146048   0.323084    0.28133783\n",
      "  0.          0.        ]\n",
      "episode 7-step 29, taking action 1, observation [-0.15573874  1.2603699  -0.5619301  -0.44294393  0.33879575  0.31423515\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 7-step 30, taking action 3, observation [-0.16115074  1.249908   -0.55470645 -0.4682178   0.3529676   0.2834365\n",
      "  0.          0.        ]\n",
      "episode 7-step 31, taking action 2, observation [-0.16696587  1.2400823  -0.59454155 -0.4399584   0.36670673  0.2747829\n",
      "  0.          0.        ]\n",
      "episode 7-step 32, taking action 3, observation [-0.17271404  1.2297025  -0.5859623  -0.46422642  0.3785163   0.23619139\n",
      "  0.          0.        ]\n",
      "episode 7-step 33, taking action 0, observation [-0.1784627   1.2187243  -0.5859559  -0.49090397  0.39032578  0.2361891\n",
      "  0.          0.        ]\n",
      "episode 7-step 34, taking action 3, observation [-0.18415394  1.2071872  -0.5785969  -0.51538557  0.40045768  0.20263728\n",
      "  0.          0.        ]\n",
      "episode 7-step 35, taking action 2, observation [-0.18999243  1.1957995  -0.593542   -0.50888383  0.4108625   0.20809639\n",
      "  0.          0.        ]\n",
      "episode 7-step 36, taking action 2, observation [-0.19607659  1.1851772  -0.61864793 -0.47511193  0.42190686  0.22088695\n",
      "  0.          0.        ]\n",
      "episode 7-step 37, taking action 3, observation [-0.20208903  1.1739991  -0.609546   -0.4993199   0.43090764  0.1800157\n",
      "  0.          0.        ]\n",
      "episode 7-step 38, taking action 3, observation [-0.20803776  1.1622617  -0.60146344 -0.52370256  0.43807718  0.1433909\n",
      "  0.          0.        ]\n",
      "episode 7-step 39, taking action 0, observation [-0.21398664  1.1499248  -0.60146075 -0.5503731   0.44524667  0.14339042\n",
      "  0.          0.        ]\n",
      "episode 7-step 40, taking action 1, observation [-0.22001687  1.1369311  -0.61174613 -0.5803036   0.45480603  0.19118735\n",
      "  0.          0.        ]\n",
      "episode 7-step 41, taking action 2, observation [-0.22634888  1.1237073  -0.64119637 -0.59036416  0.46361217  0.17612243\n",
      "  0.          0.        ]\n",
      "episode 7-step 42, taking action 0, observation [-0.23268123  1.1098844  -0.6411921  -0.6170367   0.47241822  0.1761215\n",
      "  0.          0.        ]\n",
      "episode 7-step 43, taking action 2, observation [-0.2390213   1.0960735  -0.6426759  -0.61680037  0.48205107  0.19265687\n",
      "  0.          0.        ]\n",
      "episode 7-step 44, taking action 3, observation [-0.24530621  1.0817194  -0.6355017  -0.64042217  0.48987693  0.15651706\n",
      "  0.          0.        ]\n",
      "episode 7-step 45, taking action 1, observation [-0.25165382  1.0667171  -0.6434361  -0.66988814  0.4996022   0.19450626\n",
      "  0.          0.        ]\n",
      "episode 7-step 46, taking action 0, observation [-0.25800186  1.0511156  -0.6434304  -0.69656175  0.5093275   0.19450496\n",
      "  0.          0.        ]\n",
      "episode 7-step 47, taking action 1, observation [-0.26441717  1.0348718  -0.65182024 -0.7258263   0.5209974   0.2333987\n",
      "  0.          0.        ]\n",
      "episode 7-step 48, taking action 3, observation [-0.2707696   1.0180876  -0.64369357 -0.74922633  0.53064096  0.19287051\n",
      "  0.          0.        ]\n",
      "episode 7-step 49, taking action 3, observation [-0.27706933  1.0007634  -0.63680583 -0.77265954  0.5384759   0.1566987\n",
      "  0.          0.        ]\n",
      "episode 7-step 50, taking action 1, observation [-0.28342646  0.9827992  -0.6439761  -0.80172884  0.54800844  0.19065131\n",
      "  0.          0.        ]\n",
      "episode 7-step 51, taking action 0, observation [-0.28978395  0.96423614 -0.64397013 -0.828402    0.55754095  0.19065009\n",
      "  0.          0.        ]\n",
      "episode 7-step 52, taking action 3, observation [-0.2960855   0.9451321  -0.6367219  -0.85182214  0.56519234  0.15302795\n",
      "  0.          0.        ]\n",
      "episode 7-step 53, taking action 1, observation [-0.30245638  0.92535985 -0.6455556  -0.8823772   0.57512426  0.19863841\n",
      "  0.          0.        ]\n",
      "episode 7-step 54, taking action 3, observation [-0.30876413  0.9050583  -0.63736063 -0.9051597   0.5828804   0.15512171\n",
      "  0.          0.        ]\n",
      "episode 7-step 55, taking action 3, observation [-0.315023    0.88420844 -0.6310336  -0.9289374   0.58897763  0.12194618\n",
      "  0.          0.        ]\n",
      "episode 7-step 56, taking action 2, observation [-0.3218147   0.86386245 -0.6841601  -0.9065069   0.59491587  0.11876489\n",
      "  0.          0.        ]\n",
      "episode 7-step 57, taking action 0, observation [-0.32860652  0.8429167  -0.6841577  -0.93317604  0.6008541   0.11876459\n",
      "  0.          0.        ]\n",
      "episode 7-step 58, taking action 3, observation [-0.33532262  0.8214525  -0.6744064  -0.9552532   0.6042012   0.06694244\n",
      "  0.          0.        ]\n",
      "episode 7-step 59, taking action 0, observation [-0.3420388   0.7993881  -0.6744056  -0.98192054  0.6075483   0.06694238\n",
      "  0.          0.        ]\n",
      "episode 7-step 60, taking action 2, observation [-0.349155    0.7778109  -0.7147273  -0.9604389   0.611289    0.07481481\n",
      "  0.          0.        ]\n",
      "episode 7-step 61, taking action 3, observation [-0.3562149   0.755696   -0.7074695  -0.9835761   0.61307424  0.03570487\n",
      "  0.          0.        ]\n",
      "episode 7-step 62, taking action 0, observation [-0.3632748   0.7329812  -0.7074693  -1.0102429   0.61485946  0.03570486\n",
      "  0.          0.        ]\n",
      "episode 7-step 63, taking action 2, observation [-0.37041697  0.7102663  -0.71631736 -1.0105449   0.6174008   0.05082697\n",
      "  0.          0.        ]\n",
      "episode 7-step 64, taking action 0, observation [-0.37755919  0.6869515  -0.7163169  -1.0372119   0.6199422   0.05082713\n",
      "  0.          0.        ]\n",
      "episode 7-step 65, taking action 2, observation [-0.3853764   0.6642434  -0.7834995  -1.0100968   0.62210375  0.04323169\n",
      "  0.          0.        ]\n",
      "episode 7-step 66, taking action 1, observation [-0.39325747  0.6408765  -0.7915861  -1.040207    0.626339    0.08470522\n",
      "  0.          0.        ]\n",
      "episode 7-step 67, taking action 2, observation [-0.40159696  0.61805433 -0.8377067  -1.0161428   0.63092506  0.09172048\n",
      "  0.          0.        ]\n",
      "episode 7-step 68, taking action 3, observation [-0.40986934  0.5947115  -0.8290262  -1.0383285   0.63310874  0.04367331\n",
      "  0.          0.        ]\n",
      "episode 7-step 69, taking action 1, observation [-0.41820174  0.5707093  -0.8366524  -1.0684438   0.63729405  0.08370666\n",
      "  0.          0.        ]\n",
      "episode 7-step 70, taking action 3, observation [-0.4264565   0.54617876 -0.8268243  -1.0909151   0.638955    0.03321927\n",
      "  0.          0.        ]\n",
      "episode 7-step 71, taking action 2, observation [-0.4353435   0.5221788  -0.8898285  -1.0672262   0.64034694  0.0278387\n",
      "  0.          0.        ]\n",
      "episode 7-step 72, taking action 2, observation [-0.44458047  0.49846345 -0.92509717 -1.0547113   0.64207006  0.03446201\n",
      "  0.          0.        ]\n",
      "episode 7-step 73, taking action 0, observation [-0.45381746  0.47414818 -0.92509687 -1.0813783   0.64379317  0.03446212\n",
      "  0.          0.        ]\n",
      "episode 7-step 74, taking action 3, observation [-4.6301061e-01  4.4929406e-01 -9.1933393e-01 -1.1046420e+00\n",
      "  6.4382130e-01  5.6291086e-04  0.0000000e+00  0.0000000e+00]\n",
      "episode 7-step 75, taking action 1, observation [-0.4722585   0.42376536 -0.9265114  -1.1354706   0.6459431   0.042436\n",
      "  0.          0.        ]\n",
      "episode 7-step 76, taking action 1, observation [-0.48156533  0.39758456 -0.93391484 -1.1652228   0.64994985  0.08013511\n",
      "  0.          0.        ]\n",
      "episode 7-step 77, taking action 0, observation [-0.4908722   0.37080404 -0.9339137  -1.1918906   0.65395653  0.08013418\n",
      "  0.          0.        ]\n",
      "episode 7-step 78, taking action 0, observation [-0.5001792   0.3434236  -0.93391246 -1.2185582   0.6579632   0.08013411\n",
      "  0.          0.        ]\n",
      "episode 7-step 79, taking action 1, observation [-0.50953346  0.31537443 -0.94013417 -1.2490687   0.663844    0.11761562\n",
      "  0.          0.        ]\n",
      "episode 7-step 80, taking action 1, observation [-0.5189445   0.28665248 -0.94747496 -1.2798747   0.67184603  0.16004127\n",
      "  0.          0.        ]\n",
      "episode 7-step 81, taking action 1, observation [-0.52840626  0.25726327 -0.9540184  -1.3103836   0.68177694  0.19861977\n",
      "  0.          0.        ]\n",
      "episode 7-step 82, taking action 3, observation [-0.53781044  0.2273462  -0.9465419  -1.3329791   0.689576    0.15598145\n",
      "  0.          0.        ]\n",
      "episode 7-step 83, taking action 0, observation [-0.54721504  0.19682981 -0.946537   -1.3596497   0.69737506  0.1559802\n",
      "  0.          0.        ]\n",
      "episode 7-step 84, taking action 1, observation [-0.55667055  0.16565968 -0.95294046 -1.3895011   0.706929    0.1910789\n",
      "  0.          0.        ]\n",
      "episode 7-step 85, taking action 2, observation [-0.5666775   0.1347682  -1.0078884  -1.3770862   0.7163026   0.18747054\n",
      "  0.          0.        ]\n",
      "episode 7-step 86, taking action 1, observation [-0.57673895  0.10320197 -1.0148947  -1.4080733   0.72780555  0.23005947\n",
      "  0.          0.        ]\n",
      "episode 7-step 87, taking action 1, observation [-0.58686566  0.07095846 -1.02311    -1.4393226   0.74168736  0.27763626\n",
      "  0.          0.        ]\n",
      "episode 7-step 88, taking action 2, observation [-0.5972408   0.03862203 -1.0479778  -1.443647    0.75577754  0.2818041\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 7-step 89, taking action 2, observation [-0.60782975  0.00614466 -1.0694032  -1.4501259   0.77011853  0.28681996\n",
      "  0.          0.        ]\n",
      "episode 7-step 90, taking action 3, observation [-0.61836433 -0.02684289 -1.062122   -1.4717753   0.7820926   0.23948078\n",
      "  0.          0.        ]\n",
      "episode 7-step 91, taking action 1, observation [-0.6289526  -0.06049918 -1.0688429  -1.5025527   0.7961146   0.28044122\n",
      "  0.          0.        ]\n",
      "episode 7-step 92, taking action 1, observation [-0.6395853  -0.09483142 -1.0745364  -1.5336629   0.81213295  0.32036602\n",
      "  0.          0.        ]\n",
      "episode 7-step 93, taking action 3, observation [-0.65015924 -0.12965071 -1.0665257  -1.5540217   0.82531726  0.26368585\n",
      "  0.          1.        ]\n",
      "episode 7-step 94, taking action 0, observation [-0.6608685  -0.16326691 -1.0733687  -1.4966471   0.8241251  -0.03096528\n",
      "  0.          1.        ]\n",
      "episode 7-step 95, taking action 1, observation [-0.67360884 -0.19344245 -1.3948913  -0.93650746  0.74201953 -3.7767594\n",
      "  0.          1.        ]\n",
      "episode 7-step 96, taking action 2, observation [-0.689267   -0.21626844 -1.4252764  -0.94155705  0.5643704  -3.5551808\n",
      "  1.          1.        ]\n",
      "episode 7-step 97, taking action 1, observation [-0.70503587 -0.23882753 -1.4370184  -0.95323175  0.40930772 -3.0995836\n",
      "  1.          0.        ]\n",
      "episode 7-step 98, taking action 0, observation [-7.2121644e-01 -2.5223571e-01 -1.6795437e+00 -5.1084542e-01\n",
      "  4.2933300e-01 -2.7999050e-08  1.0000000e+00  0.0000000e+00]\n",
      "episode 7-step 99, taking action 0, observation [-7.3811942e-01 -2.6379192e-01 -1.6902218e+00 -5.1409322e-01\n",
      "  4.2909491e-01  1.7979218e-07  0.0000000e+00  1.0000000e+00]\n",
      "episode 8-step 0, taking action 1, observation [-0.0083415   1.4271153  -0.42780954  0.34678316  0.01146942  0.13352802\n",
      "  0.          0.        ]\n",
      "episode 8-step 1, taking action 1, observation [-0.0126339   1.4343282  -0.43807286  0.32048243  0.02019359  0.17450027\n",
      "  0.          0.        ]\n",
      "episode 8-step 2, taking action 0, observation [-0.01692667  1.4409422  -0.43809992  0.29380608  0.02891607  0.1744657\n",
      "  0.          0.        ]\n",
      "episode 8-step 3, taking action 0, observation [-0.02121973  1.4469571  -0.43812567  0.26713178  0.0376376   0.17444679\n",
      "  0.          0.        ]\n",
      "episode 8-step 4, taking action 1, observation [-0.02560597  1.4523604  -0.4498027   0.23982911  0.04870144  0.22129743\n",
      "  0.          0.        ]\n",
      "episode 8-step 5, taking action 0, observation [-0.02999258  1.4571652  -0.44983593  0.21315199  0.05976246  0.22124061\n",
      "  0.          0.        ]\n",
      "episode 8-step 6, taking action 1, observation [-0.034447    1.4613639  -0.45831227  0.1860404   0.07252223  0.25521895\n",
      "  0.          0.        ]\n",
      "episode 8-step 7, taking action 0, observation [-0.03890181  1.4649646  -0.45834976  0.1593597   0.08527915  0.25516206\n",
      "  0.          0.        ]\n",
      "episode 8-step 8, taking action 1, observation [-0.04345036  1.4679573  -0.47008738  0.13206169  0.10039166  0.3022782\n",
      "  0.          0.        ]\n",
      "episode 8-step 9, taking action 2, observation [-0.04804754  1.4712293  -0.47496623  0.14432964  0.11555228  0.30324048\n",
      "  0.          0.        ]\n",
      "episode 8-step 10, taking action 1, observation [-0.05271654  1.4738858  -0.4839521   0.1166612   0.13253453  0.339676\n",
      "  0.          0.        ]\n",
      "episode 8-step 11, taking action 2, observation [-0.05756082  1.4768953  -0.5010127   0.1322023   0.14909111  0.33116204\n",
      "  0.          0.        ]\n",
      "episode 8-step 12, taking action 3, observation [-0.06230936  1.4793319  -0.48892674  0.10680971  0.16316845  0.28157234\n",
      "  0.          0.        ]\n",
      "episode 8-step 13, taking action 3, observation [-0.0669838   1.4811771  -0.4796041   0.08062988  0.1753628   0.24390943\n",
      "  0.          0.        ]\n",
      "episode 8-step 14, taking action 1, observation [-0.07174321  1.4823945  -0.4902665   0.05234433  0.1897593   0.28795627\n",
      "  0.          0.        ]\n",
      "episode 8-step 15, taking action 1, observation [-0.07656431  1.4830074  -0.49791664  0.02514313  0.20568655  0.31857437\n",
      "  0.          0.        ]\n",
      "episode 8-step 16, taking action 0, observation [-0.08138647  1.4830235  -0.49795657 -0.00155052  0.22161098  0.3185174\n",
      "  0.          0.        ]\n",
      "episode 8-step 17, taking action 1, observation [-0.08627939  1.4824216  -0.5067695  -0.02947627  0.23935087  0.35482982\n",
      "  0.          0.        ]\n",
      "episode 8-step 18, taking action 1, observation [-0.09126291  1.4811822  -0.51810235 -0.05842008  0.25948644  0.40274787\n",
      "  0.          0.        ]\n",
      "episode 8-step 19, taking action 0, observation [-0.09624805  1.4793478  -0.518147   -0.08512918  0.27961782  0.4026641\n",
      "  0.          0.        ]\n",
      "episode 8-step 20, taking action 2, observation [-0.1013051   1.4779128  -0.52574193 -0.06774978  0.3002831   0.4133435\n",
      "  0.          0.        ]\n",
      "episode 8-step 21, taking action 2, observation [-0.10649242  1.4772013  -0.5392556  -0.03599735  0.3215815   0.42600632\n",
      "  0.          0.        ]\n",
      "episode 8-step 22, taking action 0, observation [-0.11168156  1.4758953  -0.53929293 -0.06271986  0.3428758   0.42592412\n",
      "  0.          0.        ]\n",
      "episode 8-step 23, taking action 0, observation [-0.11687279  1.4739945  -0.53933287 -0.08943252  0.36416665  0.42585516\n",
      "  0.          0.        ]\n",
      "episode 8-step 24, taking action 1, observation [-0.12213745  1.471429   -0.54860914 -0.1198258   0.38765547  0.4697762\n",
      "  0.          0.        ]\n",
      "episode 8-step 25, taking action 3, observation [-0.12732191  1.4683173  -0.53824383 -0.14383568  0.40884084  0.42370787\n",
      "  0.          0.        ]\n",
      "episode 8-step 26, taking action 3, observation [-0.13242368  1.4646729  -0.52749085 -0.16708203  0.42753446  0.37387294\n",
      "  0.          0.        ]\n",
      "episode 8-step 27, taking action 1, observation [-0.13760643  1.4603662  -0.5376597  -0.1974504   0.44866014  0.42251334\n",
      "  0.          0.        ]\n",
      "episode 8-step 28, taking action 2, observation [-0.1432291   1.4567039  -0.58130884 -0.1690332   0.46961522  0.4191018\n",
      "  0.          0.        ]\n",
      "episode 8-step 29, taking action 3, observation [-0.14879532  1.4524978  -0.57383096 -0.19288038  0.48875073  0.38270992\n",
      "  0.          0.        ]\n",
      "episode 8-step 30, taking action 1, observation [-0.1544425   1.4476345  -0.5838736  -0.22310528  0.5102915   0.43081555\n",
      "  0.          0.        ]\n",
      "episode 8-step 31, taking action 2, observation [-0.1605154   1.4434431  -0.6263975  -0.1935747   0.5320284   0.43473703\n",
      "  0.          0.        ]\n",
      "episode 8-step 32, taking action 2, observation [-0.16706848  1.4393765  -0.6734935  -0.1880245   0.5529841   0.41911474\n",
      "  0.          0.        ]\n",
      "episode 8-step 33, taking action 1, observation [-0.17368488  1.4346592  -0.6812756  -0.21792363  0.5759053   0.45842472\n",
      "  0.          0.        ]\n",
      "episode 8-step 34, taking action 3, observation [-0.18022776  1.4294171  -0.6715774  -0.24061522  0.5963775   0.40944394\n",
      "  0.          0.        ]\n",
      "episode 8-step 35, taking action 2, observation [-0.18696156  1.4243133  -0.69087714 -0.23491997  0.617397    0.42038995\n",
      "  0.          0.        ]\n",
      "episode 8-step 36, taking action 2, observation [-0.1941084   1.4191782  -0.7314864  -0.236333    0.6378728   0.40951625\n",
      "  0.          0.        ]\n",
      "episode 8-step 37, taking action 2, observation [-0.2015171   1.4140115  -0.7574818  -0.23800103  0.65841794  0.41090244\n",
      "  0.          0.        ]\n",
      "episode 8-step 38, taking action 3, observation [-0.20887895  1.4083202  -0.75099605 -0.26071352  0.67701936  0.37202886\n",
      "  0.          0.        ]\n",
      "episode 8-step 39, taking action 3, observation [-0.21619415  1.4020836  -0.74481064 -0.28441963  0.6939534   0.33868137\n",
      "  0.          0.        ]\n",
      "episode 8-step 40, taking action 3, observation [-0.22346321  1.3953099  -0.73863864 -0.30766255  0.70910895  0.30311087\n",
      "  0.          0.        ]\n",
      "episode 8-step 41, taking action 1, observation [-0.23080477  1.387841   -0.74784154 -0.339908    0.72703433  0.35850805\n",
      "  0.          0.        ]\n",
      "episode 8-step 42, taking action 1, observation [-0.23820157  1.3796971  -0.7547577  -0.3710486   0.7471163   0.40163988\n",
      "  0.          0.        ]\n",
      "episode 8-step 43, taking action 3, observation [-0.24554057  1.3710377  -0.74699557 -0.39308462  0.76486766  0.35502732\n",
      "  0.          0.        ]\n",
      "episode 8-step 44, taking action 1, observation [-0.25294724  1.3617003  -0.7552942  -0.4245248   0.7850644   0.40393466\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 8-step 45, taking action 3, observation [-0.26030207  1.3518357  -0.74837166 -0.44716176  0.8032032   0.36277676\n",
      "  0.          0.        ]\n",
      "episode 8-step 46, taking action 2, observation [-0.2681687   1.3423741  -0.79971635 -0.42968765  0.821923    0.37439612\n",
      "  0.          0.        ]\n",
      "episode 8-step 47, taking action 0, observation [-0.2760376   1.3323156  -0.79968435 -0.45637453  0.8406424   0.37438706\n",
      "  0.          0.        ]\n",
      "episode 8-step 48, taking action 3, observation [-0.28386316  1.3217291  -0.79381806 -0.47902462  0.85744673  0.3360864\n",
      "  0.          0.        ]\n",
      "episode 8-step 49, taking action 2, observation [-0.29249483  1.3113097  -0.8737869  -0.47139022  0.8735987   0.3230394\n",
      "  0.          0.        ]\n",
      "episode 8-step 50, taking action 1, observation [-0.30117497  1.3002161  -0.8797431  -0.5025418   0.8918134   0.3642941\n",
      "  0.          0.        ]\n",
      "episode 8-step 51, taking action 3, observation [-0.30981296  1.2885903  -0.8741236  -0.52536184  0.9081901   0.32753295\n",
      "  0.          0.        ]\n",
      "episode 8-step 52, taking action 0, observation [-0.31845284  1.2763666  -0.87409717 -0.5520423   0.9245664   0.32752687\n",
      "  0.          0.        ]\n",
      "episode 8-step 53, taking action 2, observation [-0.32737532  1.263988   -0.9023224  -0.5592001   0.94125116  0.33369452\n",
      "  0.          0.        ]\n",
      "episode 8-step 54, taking action 3, observation [-0.33625603  1.2511189  -0.8964058  -0.57970965  0.9553576   0.28212893\n",
      "  0.          0.        ]\n",
      "episode 8-step 55, taking action 2, observation [-0.34561467  1.2378396  -0.9435798  -0.5975577   0.9686661   0.2661697\n",
      "  0.          0.        ]\n",
      "episode 8-step 56, taking action 2, observation [-0.3556601   1.2247939  -1.0122879  -0.5874299   0.9822926   0.27253076\n",
      "  0.          0.        ]\n",
      "episode 8-step 57, taking action 3, observation [-0.36565787  1.21125    -1.0059545  -0.60818577  0.99335843  0.22131702\n",
      "  0.          0.        ]\n",
      "episode 8-step 58, taking action 2, observation [-0.3764637   1.1980839  -1.0869217  -0.59172046  1.0049093   0.23101743\n",
      "  0.          0.        ]\n",
      "episode 8-step 59, taking action 0, observation [-0.3872705   1.1843188  -1.0869076  -0.6183932   1.0164601   0.23101532\n",
      "  0.          0.        ]\n",
      "episode 8-step 60, taking action 1, observation [-0.39812002  1.1698592  -1.0923005  -0.6506758   1.0303813   0.27842432\n",
      "  0.          0.        ]\n",
      "episode 8-step 61, taking action 3, observation [-0.40894136  1.1548746  -1.0884008  -0.6730344   1.0425054   0.2424819\n",
      "  0.          0.        ]\n",
      "episode 8-step 62, taking action 0, observation [-0.41976386  1.1392908  -1.0883849  -0.6997073   1.0546293   0.24247944\n",
      "  0.          0.        ]\n",
      "episode 8-step 63, taking action 0, observation [-0.43058744  1.123108   -1.0883689  -0.72638017  1.0667531   0.24247701\n",
      "  0.          0.        ]\n",
      "episode 8-step 64, taking action 1, observation [-0.4414391   1.1062396  -1.0920112  -0.75810784  1.0809019   0.28297445\n",
      "  0.          0.        ]\n",
      "episode 8-step 65, taking action 2, observation [-0.45301843  1.0890641  -1.1642314  -0.7713419   1.0942637   0.26723576\n",
      "  0.          0.        ]\n",
      "episode 8-step 66, taking action 3, observation [-0.46456593  1.0713643  -1.1600333  -0.7935802   1.1057541   0.22981009\n",
      "  0.          0.        ]\n",
      "episode 8-step 67, taking action 1, observation [-0.47614947  1.0529399  -1.1648085  -0.8275889   1.1201504   0.28792706\n",
      "  0.          0.        ]\n",
      "episode 8-step 68, taking action 0, observation [-0.48773465  1.0339164  -1.1647851  -0.85426325  1.1345466   0.28792292\n",
      "  0.          0.        ]\n",
      "episode 8-step 69, taking action 1, observation [-0.49934402  1.0141983  -1.1679208  -0.8865347   1.1511252   0.33156994\n",
      "  0.          0.        ]\n",
      "episode 8-step 70, taking action 3, observation [-0.5109279  0.9939636 -1.1643376 -0.9083634  1.1657428  0.2923524\n",
      "  0.         0.       ]\n",
      "episode 8-step 71, taking action 2, observation [-0.5232552   0.9738256  -1.2387192  -0.90450126  1.1809796   0.3047374\n",
      "  0.          0.        ]\n",
      "episode 8-step 72, taking action 1, observation [-0.53561723  0.95298415 -1.2428869  -0.9374031   1.1987095   0.3545985\n",
      "  0.          0.        ]\n",
      "episode 8-step 73, taking action 0, observation [-0.54798174  0.93154407 -1.2428502  -0.96407956  1.216439    0.35459083\n",
      "  0.          0.        ]\n",
      "episode 8-step 74, taking action 3, observation [-0.56031287  0.9096152  -1.2383153  -0.98419684  1.2315344   0.30190724\n",
      "  0.          0.        ]\n",
      "episode 8-step 75, taking action 1, observation [-0.57266563  0.8869745  -1.2410948  -1.0175291   1.2491869   0.3530499\n",
      "  0.          0.        ]\n",
      "episode 8-step 76, taking action 0, observation [-0.5850211   0.86373514 -1.2410576  -1.0442042   1.266839    0.3530423\n",
      "  0.          0.        ]\n",
      "episode 8-step 77, taking action 3, observation [-0.5973591  0.8400232 -1.238199  -1.0634098  1.2816317  0.2958538\n",
      "  0.         0.       ]\n",
      "episode 8-step 78, taking action 1, observation [-0.60971934  0.8155835  -1.2410054  -1.0977038   1.2993432   0.35423136\n",
      "  0.          0.        ]\n",
      "episode 8-step 79, taking action 3, observation [-0.6220651   0.7906301  -1.2387435  -1.1193041   1.3150903   0.31494245\n",
      "  0.          0.        ]\n",
      "episode 8-step 80, taking action 1, observation [-0.6344353   0.76497215 -1.2415315  -1.1522593   1.333274    0.3636731\n",
      "  0.          0.        ]\n",
      "episode 8-step 81, taking action 1, observation [-0.6468328  0.7386107 -1.2444774 -1.1851883  1.3538922  0.4123648\n",
      "  0.         0.       ]\n",
      "episode 8-step 82, taking action 3, observation [-0.6592145   0.7117583  -1.2419763  -1.2054305   1.3720337   0.36283046\n",
      "  0.          0.        ]\n",
      "episode 8-step 83, taking action 3, observation [-0.6715758   0.68442696 -1.2390974  -1.2249085   1.3874012   0.30734968\n",
      "  0.          0.        ]\n",
      "episode 8-step 84, taking action 2, observation [-0.6847194   0.65674084 -1.31711    -1.2407595   1.4028516   0.30900827\n",
      "  0.          0.        ]\n",
      "episode 8-step 85, taking action 2, observation [-0.698833   0.6288663 -1.4139464 -1.2495043  1.4188018  0.3190036\n",
      "  0.         0.       ]\n",
      "episode 8-step 86, taking action 2, observation [-0.71331257  0.6003592  -1.4502397  -1.2774059   1.434396    0.31188414\n",
      "  0.          0.        ]\n",
      "episode 8-step 87, taking action 3, observation [-0.7277768  0.5713798 -1.4481018 -1.2964771  1.4470919  0.2539192\n",
      "  0.         0.       ]\n",
      "episode 8-step 88, taking action 0, observation [-0.7422425   0.54180056 -1.4480816  -1.3231455   1.4597877   0.25391635\n",
      "  0.          0.        ]\n",
      "episode 8-step 89, taking action 1, observation [-0.75672585  0.51150817 -1.4499426  -1.3565953   1.4750683   0.30561215\n",
      "  0.          0.        ]\n",
      "episode 8-step 90, taking action 0, observation [-0.77121127  0.48061603 -1.4499134  -1.383264    1.4903487   0.30560723\n",
      "  0.          0.        ]\n",
      "episode 8-step 91, taking action 3, observation [-0.7856877   0.4492535  -1.448582   -1.4022083   1.5027025   0.24707642\n",
      "  0.          0.        ]\n",
      "episode 8-step 92, taking action 0, observation [-0.80016553  0.41729122 -1.4485629  -1.4288758   1.5150561   0.24707381\n",
      "  0.          0.        ]\n",
      "episode 8-step 93, taking action 0, observation [-0.81464463  0.384729   -1.4485438  -1.4555434   1.5274097   0.24707124\n",
      "  0.          0.        ]\n",
      "episode 8-step 94, taking action 3, observation [-0.8291162   0.35170084 -1.4475293  -1.4742165   1.5367407   0.186619\n",
      "  0.          0.        ]\n",
      "episode 8-step 95, taking action 2, observation [-0.8441454   0.31830734 -1.503217   -1.4908828   1.5467067   0.19932003\n",
      "  0.          0.        ]\n",
      "episode 8-step 96, taking action 2, observation [-0.85994685  0.28452766 -1.5803486  -1.5084366   1.5572451   0.2107683\n",
      "  0.          0.        ]\n",
      "episode 8-step 97, taking action 1, observation [-0.87574464  0.25002044 -1.5798811  -1.5427076   1.5706515   0.2681272\n",
      "  0.          0.        ]\n",
      "episode 8-step 98, taking action 3, observation [-0.8915449   0.21504623 -1.579976   -1.561445    1.5810654   0.2082781\n",
      "  0.          0.        ]\n",
      "episode 8-step 99, taking action 1, observation [-0.90734565  0.17937091 -1.5798723  -1.5941374   1.5937532   0.25375634\n",
      "  0.          0.        ]\n",
      "episode 9-step 0, taking action 0, observation [-0.0094079   1.3990071  -0.47580808 -0.27758262  0.01079008  0.10666776\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 9-step 1, taking action 1, observation [-0.01418371  1.3921586  -0.48479137 -0.30444375  0.01791866  0.14258508\n",
      "  0.          0.        ]\n",
      "episode 9-step 2, taking action 0, observation [-0.01895971  1.3847108  -0.48481378 -0.3311187   0.02504549  0.14254992\n",
      "  0.          0.        ]\n",
      "episode 9-step 3, taking action 1, observation [-0.02381372  1.3766519  -0.494595   -0.35835627  0.03413366  0.18178006\n",
      "  0.          0.        ]\n",
      "episode 9-step 4, taking action 3, observation [-0.02859268  1.3680024  -0.48515955 -0.3845978   0.04131757  0.14369161\n",
      "  0.          0.        ]\n",
      "episode 9-step 5, taking action 0, observation [-0.03337193  1.3587539  -0.48517933 -0.41127005  0.04850161  0.143694\n",
      "  0.          0.        ]\n",
      "episode 9-step 6, taking action 0, observation [-0.03815136  1.3489058  -0.48519978 -0.43794242  0.05568413  0.14366338\n",
      "  0.          0.        ]\n",
      "episode 9-step 7, taking action 2, observation [-0.04313106  1.3399096  -0.5045205  -0.40009177  0.06217114  0.12975204\n",
      "  0.          0.        ]\n",
      "episode 9-step 8, taking action 1, observation [-0.04818649  1.3303133  -0.51399755 -0.42686844  0.07055255  0.16764358\n",
      "  0.          0.        ]\n",
      "episode 9-step 9, taking action 0, observation [-0.05324211  1.3201181  -0.5140234  -0.45353696  0.07893153  0.16759495\n",
      "  0.          0.        ]\n",
      "episode 9-step 10, taking action 2, observation [-0.05830612  1.3100015  -0.51498044 -0.45010242  0.08744007  0.17018649\n",
      "  0.          0.        ]\n",
      "episode 9-step 11, taking action 1, observation [-0.06344853  1.299278   -0.524787   -0.4772469   0.09791589  0.20953555\n",
      "  0.          0.        ]\n",
      "episode 9-step 12, taking action 3, observation [-0.06850538  1.287978   -0.51401085 -0.502784    0.10619154  0.16552809\n",
      "  0.          0.        ]\n",
      "episode 9-step 13, taking action 2, observation [-0.07348251  1.2769606  -0.50671744 -0.49032134  0.11513448  0.17887498\n",
      "  0.          0.        ]\n",
      "episode 9-step 14, taking action 1, observation [-0.07852077  1.2653267  -0.514368   -0.51791173  0.12563278  0.20998546\n",
      "  0.          0.        ]\n",
      "episode 9-step 15, taking action 0, observation [-0.08355961  1.2530942  -0.51439786 -0.5445877   0.1361291   0.20994475\n",
      "  0.          0.        ]\n",
      "episode 9-step 16, taking action 1, observation [-0.08868103  1.2402447  -0.5247561  -0.57228565  0.14872737  0.25198805\n",
      "  0.          0.        ]\n",
      "episode 9-step 17, taking action 2, observation [-0.09373169  1.2273674  -0.51824605 -0.5737012   0.16191635  0.26380318\n",
      "  0.          0.        ]\n",
      "episode 9-step 18, taking action 1, observation [-0.09888019  1.2138742  -0.5304699  -0.60147184  0.17757945  0.31328994\n",
      "  0.          0.        ]\n",
      "episode 9-step 19, taking action 2, observation [-0.10418339  1.2003341  -0.54549205 -0.60367864  0.19283453  0.30512896\n",
      "  0.          0.        ]\n",
      "episode 9-step 20, taking action 1, observation [-0.10955916  1.1861787  -0.5545363  -0.63142097  0.20993501  0.34204\n",
      "  0.          0.        ]\n",
      "episode 9-step 21, taking action 1, observation [-0.1150115   1.1714025  -0.56404907 -0.6595027   0.22899672  0.38126832\n",
      "  0.          0.        ]\n",
      "episode 9-step 22, taking action 3, observation [-0.12036953  1.156067   -0.55207384 -0.684194    0.2455372   0.33083928\n",
      "  0.          0.        ]\n",
      "episode 9-step 23, taking action 2, observation [-0.12589464  1.1407325  -0.56842107 -0.68427277  0.2617786   0.32485706\n",
      "  0.          0.        ]\n",
      "episode 9-step 24, taking action 2, observation [-0.13140145  1.1253304  -0.56702405 -0.6875498   0.2785266   0.33495975\n",
      "  0.          0.        ]\n",
      "episode 9-step 25, taking action 2, observation [-0.13724442  1.1105514  -0.6001331  -0.65995044  0.29481477  0.3257635\n",
      "  0.          0.        ]\n",
      "episode 9-step 26, taking action 3, observation [-0.14300832  1.0952104  -0.5900676  -0.68465346  0.30895776  0.28286016\n",
      "  0.          0.        ]\n",
      "episode 9-step 27, taking action 0, observation [-0.14877272  1.079272   -0.5900601  -0.7113361   0.32310054  0.28285617\n",
      "  0.          0.        ]\n",
      "episode 9-step 28, taking action 0, observation [-0.15453759  1.0627363  -0.59005225 -0.7380187   0.33724314  0.28285217\n",
      "  0.          0.        ]\n",
      "episode 9-step 29, taking action 3, observation [-0.16023645  1.045646   -0.58156157 -0.76234305  0.34948748  0.24488714\n",
      "  0.          0.        ]\n",
      "episode 9-step 30, taking action 3, observation [-0.16586152  1.027994   -0.5722104  -0.78692085  0.3597018   0.20428653\n",
      "  0.          0.        ]\n",
      "episode 9-step 31, taking action 1, observation [-0.17154436  1.0097145  -0.57944065 -0.81526643  0.37149435  0.23585062\n",
      "  0.          0.        ]\n",
      "episode 9-step 32, taking action 1, observation [-0.17730026  0.99080414 -0.5885446  -0.8438895   0.38525334  0.2751795\n",
      "  0.          0.        ]\n",
      "episode 9-step 33, taking action 0, observation [-0.18305674  0.971296   -0.5885358  -0.87057096  0.39901215  0.27517593\n",
      "  0.          0.        ]\n",
      "episode 9-step 34, taking action 1, observation [-0.18888502  0.9511401  -0.5975601  -0.9000479   0.41484824  0.31672183\n",
      "  0.          0.        ]\n",
      "episode 9-step 35, taking action 0, observation [-0.19471398  0.9303871  -0.59754765 -0.9267338   0.43068406  0.31671628\n",
      "  0.          0.        ]\n",
      "episode 9-step 36, taking action 1, observation [-0.20061502  0.9089948  -0.6064749  -0.9558859   0.4485353   0.35702488\n",
      "  0.          0.        ]\n",
      "episode 9-step 37, taking action 0, observation [-0.20651722  0.8870064  -0.6064578  -0.9825766   0.46638614  0.35701704\n",
      "  0.          0.        ]\n",
      "episode 9-step 38, taking action 3, observation [-0.21233425  0.864478   -0.5955365  -1.0059853   0.48172897  0.30685663\n",
      "  0.          0.        ]\n",
      "episode 9-step 39, taking action 0, observation [-0.21815224  0.84135234 -0.59552294 -1.0326694   0.49707156  0.30685166\n",
      "  0.          0.        ]\n",
      "episode 9-step 40, taking action 1, observation [-0.22405085  0.8175613  -0.6056658  -1.0631994   0.5148947   0.35646275\n",
      "  0.          0.        ]\n",
      "episode 9-step 41, taking action 3, observation [-0.22987004  0.79324764 -0.5953069  -1.0857413   0.5301434   0.30497456\n",
      "  0.          0.        ]\n",
      "episode 9-step 42, taking action 1, observation [-0.23576593  0.76826376 -0.60499    -1.1165217   0.54785013  0.35413557\n",
      "  0.          0.        ]\n",
      "episode 9-step 43, taking action 3, observation [-0.2416111   0.7427406  -0.59823114 -1.1400298   0.5637783   0.3185636\n",
      "  0.          0.        ]\n",
      "episode 9-step 44, taking action 2, observation [-0.24766326  0.7170502  -0.61871505 -1.1475757   0.57960945  0.3166238\n",
      "  0.          0.        ]\n",
      "episode 9-step 45, taking action 3, observation [-0.2536502   0.69082034 -0.61027753 -1.1708968   0.59333384  0.27448928\n",
      "  0.          0.        ]\n",
      "episode 9-step 46, taking action 0, observation [-0.25963798  0.66399264 -0.6102644  -1.1975764   0.6070581   0.27448574\n",
      "  0.          0.        ]\n",
      "episode 9-step 47, taking action 0, observation [-0.26562676  0.63656676 -0.61025107 -1.224256    0.62078214  0.2744822\n",
      "  0.          0.        ]\n",
      "episode 9-step 48, taking action 2, observation [-0.27198783  0.60975295 -0.64799213 -1.1974691   0.6352789   0.28993514\n",
      "  0.          0.        ]\n",
      "episode 9-step 49, taking action 0, observation [-0.27834997  0.5823415  -0.6479765  -1.2241498   0.64977545  0.28993094\n",
      "  0.          0.        ]\n",
      "episode 9-step 50, taking action 0, observation [-0.28471327  0.5543322  -0.64796066 -1.2508304   0.6642718   0.2899267\n",
      "  0.          0.        ]\n",
      "episode 9-step 51, taking action 1, observation [-0.29114065  0.5256669  -0.6558634  -1.2809685   0.68082106  0.3309862\n",
      "  0.          0.        ]\n",
      "episode 9-step 52, taking action 3, observation [-0.29751438  0.49646574 -0.6488121  -1.3040906   0.6954294   0.29216653\n",
      "  0.          0.        ]\n",
      "episode 9-step 53, taking action 3, observation [-0.3038346   0.4667481  -0.6416113  -1.3261865   0.7078231   0.24787441\n",
      "  0.          0.        ]\n",
      "episode 9-step 54, taking action 3, observation [-0.3100844   0.43650985 -0.63256824 -1.3482896   0.7177201   0.1979402\n",
      "  0.          0.        ]\n",
      "episode 9-step 55, taking action 0, observation [-0.31633478  0.4056726  -0.63256013 -1.3749624   0.7276171   0.19793901\n",
      "  0.          0.        ]\n",
      "episode 9-step 56, taking action 1, observation [-0.32263756  0.3741541  -0.63937664 -1.4062862   0.73970324  0.24172275\n",
      "  0.          0.        ]\n",
      "episode 9-step 57, taking action 3, observation [-0.32886773  0.342125   -0.6300185  -1.4278109   0.7490959   0.18785322\n",
      "  0.          0.        ]\n",
      "episode 9-step 58, taking action 2, observation [-0.33532944  0.30997154 -0.653267   -1.4334812   0.7587028   0.19213843\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 9-step 59, taking action 3, observation [-0.34172186  0.27730203 -0.644415   -1.4552672   0.76575536  0.14105079\n",
      "  0.          0.        ]\n",
      "episode 9-step 60, taking action 3, observation [-0.34805313  0.2441203  -0.63646257 -1.4768916   0.77034     0.09169336\n",
      "  0.          0.        ]\n",
      "episode 9-step 61, taking action 3, observation [-0.35433692  0.21039914 -0.630409   -1.5000343   0.7731282   0.05576317\n",
      "  0.          0.        ]\n",
      "episode 9-step 62, taking action 1, observation [-0.36068934  0.17599268 -0.6391122  -1.5317057   0.7784833   0.1071007\n",
      "  0.          0.        ]\n",
      "episode 9-step 63, taking action 0, observation [-0.36704192  0.14098658 -0.6391097  -1.558374    0.7838383   0.1070997\n",
      "  0.          1.        ]\n",
      "episode 9-step 64, taking action 1, observation [-0.37334815  0.10602    -0.64116955 -1.5761056   0.8004378   0.25685543\n",
      "  0.          1.        ]\n",
      "episode 9-step 65, taking action 1, observation [-0.37715107  0.09933659 -0.48013467 -0.28246477  1.0528122   5.38699\n",
      "  0.          1.        ]\n",
      "episode 9-step 66, taking action 2, observation [-0.38190895  0.09722501 -0.55475056 -0.3101756   1.3061997   4.8706703\n",
      "  0.          1.        ]\n",
      "episode 9-step 67, taking action 2, observation [-0.38838044  0.09536599 -0.6375364  -0.30815983  1.5693443   4.856793\n",
      "  0.          1.        ]\n",
      "episode 9-step 68, taking action 1, observation [-0.39437252  0.09749137 -0.5068051   0.21650825  1.7370536   1.5965326\n",
      "  0.          1.        ]\n",
      "episode 9-step 69, taking action 3, observation [-0.39966053  0.10436627 -0.51119167  0.22675468  1.8380607   1.7734101\n",
      "  0.          0.        ]\n",
      "episode 9-step 70, taking action 1, observation [-0.40502244  0.11015662 -0.5097744   0.19663306  1.9304773   1.8147428\n",
      "  0.          1.        ]\n",
      "episode 9-step 71, taking action 2, observation [-0.41091436  0.11516327 -0.55519307  0.16413261  2.0226204   1.810758\n",
      "  0.          0.        ]\n",
      "episode 9-step 72, taking action 2, observation [-0.41749772  0.11920389 -0.6148745   0.12517203  2.1137874   1.818758\n",
      "  0.          0.        ]\n",
      "episode 9-step 73, taking action 0, observation [-0.42414474  0.12257825 -0.61396825  0.09877749  2.2047071   1.817724\n",
      "  0.          0.        ]\n",
      "episode 9-step 74, taking action 0, observation [-0.43085003  0.12528493 -0.61307985  0.07242491  2.29555     1.8167002\n",
      "  0.          0.        ]\n",
      "episode 9-step 75, taking action 0, observation [-0.43760905  0.12731667 -0.6122271   0.04612451  2.386336    1.8156449\n",
      "  0.          0.        ]\n",
      "episode 9-step 76, taking action 1, observation [-0.44434667  0.12856515 -0.60253537  0.01436924  2.4793565   1.8602638\n",
      "  0.          0.        ]\n",
      "episode 9-step 77, taking action 2, observation [-0.45151144  0.12880905 -0.6392072  -0.02596618  2.573108    1.8748882\n",
      "  0.          0.        ]\n",
      "episode 9-step 78, taking action 1, observation [-0.45865965  0.1282995  -0.6310913  -0.05503653  2.6686783   1.9112753\n",
      "  0.          0.        ]\n",
      "episode 9-step 79, taking action 3, observation [-0.46592268  0.12714165 -0.6405846  -0.07776121  2.7618346   1.8629879\n",
      "  0.          0.        ]\n",
      "episode 9-step 80, taking action 1, observation [-0.47314534  0.12522835 -0.63137424 -0.1062846   2.8569198   1.90157\n",
      "  0.          0.        ]\n",
      "episode 9-step 81, taking action 1, observation [-0.48032647  0.12256008 -0.62290704 -0.13421974  2.9537153   1.9357792\n",
      "  0.          0.        ]\n",
      "episode 9-step 82, taking action 3, observation [-0.48760962  0.11919338 -0.6336177  -0.15889925  3.048204    1.8896389\n",
      "  0.          0.        ]\n",
      "episode 9-step 83, taking action 0, observation [-0.49489957  0.11510883 -0.63346666 -0.18479964  3.1426346   1.8884833\n",
      "  0.          0.        ]\n",
      "episode 9-step 84, taking action 2, observation [-0.5015491   0.11088146 -0.5542475   0.00945608  3.20385    -0.05970526\n",
      "  0.          0.        ]\n",
      "episode 9-step 85, taking action 0, observation [-0.5071659   0.11130661 -0.5592207   0.01605026  3.203857   -0.0129346\n",
      "  0.          0.        ]\n",
      "episode 9-step 86, taking action 2, observation [-0.5125929   0.11089018 -0.5559801  -0.01921862  3.1906078  -0.26824984\n",
      "  0.          0.        ]\n",
      "episode 9-step 87, taking action 1, observation [-0.51787126  0.11023965 -0.5446955  -0.02938292  3.1739364  -0.3339552\n",
      "  0.          0.        ]\n",
      "episode 9-step 88, taking action 3, observation [-0.52316153  0.10923672 -0.55115706 -0.04491521  3.1520605  -0.43759885\n",
      "  0.          0.        ]\n",
      "episode 9-step 89, taking action 1, observation [-0.5282777   0.1080009  -0.5365131  -0.05491741  3.1274674  -0.49188647\n",
      "  0.          0.        ]\n",
      "episode 9-step 90, taking action 2, observation [-0.53333235  0.10573111 -0.5449537  -0.1000572   3.0884373  -0.7807211\n",
      "  0.          0.        ]\n",
      "episode 9-step 91, taking action 1, observation [-5.3831500e-01  1.0614403e-01 -4.9739629e-01  1.5860436e-02\n",
      "  3.0894926e+00  6.1910399e-08  0.0000000e+00  0.0000000e+00]\n",
      "episode 9-step 92, taking action 1, observation [-5.4314834e-01  1.0653514e-01 -4.8264137e-01  1.5389934e-02\n",
      "  3.0903370e+00 -2.7908684e-09  0.0000000e+00  0.0000000e+00]\n",
      "episode 9-step 93, taking action 2, observation [-5.4789090e-01  1.0691079e-01 -4.7370157e-01  1.5104870e-02\n",
      "  3.0910132e+00  6.4028427e-09  0.0000000e+00  0.0000000e+00]\n",
      "episode 9-step 94, taking action 1, observation [-5.5249363e-01  1.0726935e-01 -4.5982575e-01  1.4662416e-02\n",
      "  3.0915546e+00  1.5654084e-08  0.0000000e+00  0.0000000e+00]\n",
      "episode 9-step 95, taking action 2, observation [-5.5691612e-01  1.0761696e-01 -4.4200844e-01  1.4432629e-02\n",
      "  3.0918794e+00 -2.1775200e-03  0.0000000e+00  0.0000000e+00]\n",
      "episode 9-step 96, taking action 1, observation [-5.6120044e-01  1.0794912e-01 -4.2823735e-01  1.3949823e-02\n",
      "  3.0921314e+00 -1.8957101e-03  0.0000000e+00  0.0000000e+00]\n",
      "episode 9-step 97, taking action 3, observation [-5.65519631e-01  1.08279645e-01 -4.31787074e-01  1.40398424e-02\n",
      "  3.09232163e+00 -1.74621237e-03  0.00000000e+00  0.00000000e+00]\n",
      "episode 9-step 98, taking action 0, observation [-5.69779158e-01  1.08602434e-01 -4.25852001e-01  1.38294017e-02\n",
      "  3.09246325e+00 -1.60962320e-03  0.00000000e+00  0.00000000e+00]\n",
      "episode 9-step 99, taking action 1, observation [-5.7389265e-01  1.0891200e-01 -4.1127759e-01  1.3345511e-02\n",
      "  3.0925670e+00 -1.4862182e-03  0.0000000e+00  0.0000000e+00]\n",
      "episode 10-step 0, taking action 1, observation [-0.00904188  1.4235244  -0.46227378  0.26700395  0.01196529  0.13518257\n",
      "  0.          0.        ]\n",
      "episode 10-step 1, taking action 0, observation [-0.01359653  1.4289343  -0.46229473  0.24036936  0.0187197   0.13510087\n",
      "  0.          0.        ]\n",
      "episode 10-step 2, taking action 0, observation [-0.01815128  1.4337447  -0.4623146   0.21369538  0.02547358  0.13509002\n",
      "  0.          0.        ]\n",
      "episode 10-step 3, taking action 2, observation [-0.02254972  1.4388225  -0.44750318  0.225527    0.03304614  0.15146542\n",
      "  0.          0.        ]\n",
      "episode 10-step 4, taking action 3, observation [-0.02686014  1.443295   -0.43644562  0.19865313  0.03839891  0.10706522\n",
      "  0.          0.        ]\n",
      "episode 10-step 5, taking action 3, observation [-0.03108797  1.4471749  -0.42608222  0.1723524   0.0416676   0.06537973\n",
      "  0.          0.        ]\n",
      "episode 10-step 6, taking action 3, observation [-0.03522797  1.4504672  -0.4150559   0.14629406  0.04271578  0.02096523\n",
      "  0.          0.        ]\n",
      "episode 10-step 7, taking action 3, observation [-0.03929462  1.4531571  -0.40586743  0.11957296  0.04192439 -0.01582905\n",
      "  0.          0.        ]\n",
      "episode 10-step 8, taking action 1, observation [-0.04343824  1.455231   -0.41553378  0.09214497  0.04308222  0.02315855\n",
      "  0.          0.        ]\n",
      "episode 10-step 9, taking action 0, observation [-0.04758186  1.4567051  -0.4155384   0.06547637  0.04423862  0.02313031\n",
      "  0.          0.        ]\n",
      "episode 10-step 10, taking action 0, observation [-0.05172558  1.457579   -0.41554222  0.03880766  0.04539527  0.0231352\n",
      "  0.          0.        ]\n",
      "episode 10-step 11, taking action 0, observation [-0.05586939  1.457853   -0.41554528  0.01213896  0.04655175  0.02313176\n",
      "  0.          0.        ]\n",
      "episode 10-step 12, taking action 3, observation [-0.05992126  1.4575238  -0.40402776 -0.01459232  0.04540252 -0.02298666\n",
      "  0.          0.        ]\n",
      "episode 10-step 13, taking action 1, observation [-0.06403427  1.456595   -0.41170216 -0.04129148  0.04579229  0.0077962\n",
      "  0.          0.        ]\n",
      "episode 10-step 14, taking action 2, observation [-0.06806908  1.4559336  -0.40436897 -0.02942503  0.04667303  0.01761652\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 10-step 15, taking action 2, observation [-0.07214985  1.4555255  -0.40888238 -0.01815958  0.04746195  0.01577976\n",
      "  0.          0.        ]\n",
      "episode 10-step 16, taking action 3, observation [-0.07616358  1.4545205  -0.4004551  -0.04464388  0.04656064 -0.01802783\n",
      "  0.          0.        ]\n",
      "episode 10-step 17, taking action 3, observation [-0.08011017  1.4529239  -0.39203188 -0.07087861  0.04396713 -0.05187533\n",
      "  0.          0.        ]\n",
      "episode 10-step 18, taking action 0, observation [-0.08405666  1.4507275  -0.3920237  -0.09754766  0.04137435 -0.05186049\n",
      "  0.          0.        ]\n",
      "episode 10-step 19, taking action 2, observation [-0.08798809  1.4489756  -0.39072517 -0.07779753  0.03898529 -0.04778543\n",
      "  0.          0.        ]\n",
      "episode 10-step 20, taking action 1, observation [-0.09198169  1.4466286  -0.39852324 -0.10428688  0.03815619 -0.01658362\n",
      "  0.          0.        ]\n",
      "episode 10-step 21, taking action 1, observation [-0.09607182  1.443688   -0.41061988 -0.13073711  0.03974555  0.03178995\n",
      "  0.          0.        ]\n",
      "episode 10-step 22, taking action 1, observation [-0.10025559  1.4401454  -0.4223508  -0.1575594   0.04368319  0.07875983\n",
      "  0.          0.        ]\n",
      "episode 10-step 23, taking action 2, observation [-0.10459328  1.4372361  -0.4371884  -0.12940568  0.04707342  0.06781033\n",
      "  0.          0.        ]\n",
      "episode 10-step 24, taking action 3, observation [-0.10883264  1.4337218  -0.42486787 -0.15622099  0.04799728  0.01847686\n",
      "  0.          0.        ]\n",
      "episode 10-step 25, taking action 2, observation [-0.1130475   1.431047   -0.4227418  -0.11892962  0.0492469   0.02499241\n",
      "  0.          0.        ]\n",
      "episode 10-step 26, taking action 2, observation [-0.11732693  1.4290426  -0.42910552 -0.08911966  0.05038982  0.02285816\n",
      "  0.          0.        ]\n",
      "episode 10-step 27, taking action 0, observation [-0.12160645  1.4264382  -0.42910546 -0.11578643  0.05153272  0.02285817\n",
      "  0.          0.        ]\n",
      "episode 10-step 28, taking action 2, observation [-0.1259963   1.424534   -0.43983617 -0.08466233  0.05237159  0.0167774\n",
      "  0.          0.        ]\n",
      "episode 10-step 29, taking action 2, observation [-0.13044462  1.4235197  -0.44565487 -0.0451107   0.05318008  0.01617003\n",
      "  0.          0.        ]\n",
      "episode 10-step 30, taking action 3, observation [-0.13481626  1.4219207  -0.4360106  -0.07102716  0.05204476 -0.02270624\n",
      "  0.          0.        ]\n",
      "episode 10-step 31, taking action 2, observation [-0.13915634  1.4210958  -0.4332264  -0.0366304   0.05127498 -0.0153956\n",
      "  0.          0.        ]\n",
      "episode 10-step 32, taking action 2, observation [-0.14367037  1.4212091  -0.45006266  0.00508059  0.04994344 -0.02663066\n",
      "  0.          0.        ]\n",
      "episode 10-step 33, taking action 1, observation [-0.14827919  1.4207149  -0.4619481  -0.02200136  0.05099702  0.02107156\n",
      "  0.          0.        ]\n",
      "episode 10-step 34, taking action 2, observation [-0.15281782  1.4206223  -0.4554131  -0.00416683  0.05253207  0.03070062\n",
      "  0.          0.        ]\n",
      "episode 10-step 35, taking action 1, observation [-0.15744515  1.4199349  -0.4665309  -0.03069384  0.05628771  0.07511295\n",
      "  0.          0.        ]\n",
      "episode 10-step 36, taking action 2, observation [-0.16209278  1.4198256  -0.46867743 -0.0050096   0.06015313  0.07730879\n",
      "  0.          0.        ]\n",
      "episode 10-step 37, taking action 1, observation [-0.16680308  1.419119   -0.47651848 -0.03163268  0.06558498  0.10863686\n",
      "  0.          0.        ]\n",
      "episode 10-step 38, taking action 0, observation [-0.17151336  1.4178128  -0.47651824 -0.05830181  0.07101682  0.10863663\n",
      "  0.          0.        ]\n",
      "episode 10-step 39, taking action 3, observation [-0.1761466   1.4159136  -0.466846   -0.08458339  0.07450616  0.06978696\n",
      "  0.          0.        ]\n",
      "episode 10-step 40, taking action 2, observation [-0.18088731  1.41426    -0.4772914  -0.07365671  0.07769322  0.06374127\n",
      "  0.          0.        ]\n",
      "episode 10-step 41, taking action 0, observation [-0.18562813  1.4120065  -0.4772913  -0.10032422  0.08088028  0.06374121\n",
      "  0.          0.        ]\n",
      "episode 10-step 42, taking action 3, observation [-0.19029121  1.409154   -0.4675695  -0.12684412  0.08212233  0.02484097\n",
      "  0.          0.        ]\n",
      "episode 10-step 43, taking action 0, observation [-0.1949544   1.4057016  -0.46756944 -0.15351094  0.08336437  0.02484079\n",
      "  0.          0.        ]\n",
      "episode 10-step 44, taking action 3, observation [-0.199545    1.4016697  -0.45844513 -0.17915982  0.08275337 -0.01222012\n",
      "  0.          0.        ]\n",
      "episode 10-step 45, taking action 0, observation [-0.20413561  1.397038   -0.45844507 -0.2058265   0.08214235 -0.01222024\n",
      "  0.          0.        ]\n",
      "episode 10-step 46, taking action 1, observation [-0.2087852   1.391789   -0.46586347 -0.23334154  0.08303835  0.01792014\n",
      "  0.          0.        ]\n",
      "episode 10-step 47, taking action 0, observation [-0.21343485  1.3859398  -0.46586353 -0.26000828  0.08393437  0.01792034\n",
      "  0.          0.        ]\n",
      "episode 10-step 48, taking action 1, observation [-0.21816535  1.3794837  -0.4760173  -0.28710884  0.08687039  0.05872032\n",
      "  0.          0.        ]\n",
      "episode 10-step 49, taking action 3, observation [-0.22281499  1.372447   -0.46584612 -0.31279033  0.08774478  0.01748771\n",
      "  0.          0.        ]\n",
      "episode 10-step 50, taking action 1, observation [-0.22753854  1.3648037  -0.47512126 -0.3398706   0.09048318  0.05476806\n",
      "  0.          0.        ]\n",
      "episode 10-step 51, taking action 2, observation [-0.232304    1.3572834  -0.47931772 -0.33440787  0.09322975  0.05493131\n",
      "  0.          0.        ]\n",
      "episode 10-step 52, taking action 0, observation [-0.23706946  1.3491632  -0.47931767 -0.36107513  0.09597632  0.05493127\n",
      "  0.          0.        ]\n",
      "episode 10-step 53, taking action 0, observation [-0.24183497  1.3404429  -0.4793176  -0.38774243  0.09872287  0.05493123\n",
      "  0.          0.        ]\n",
      "episode 10-step 54, taking action 3, observation [-0.24653149  1.3311226  -0.47068077 -0.41429964  0.09974419  0.0204265\n",
      "  0.          0.        ]\n",
      "episode 10-step 55, taking action 0, observation [-0.251228    1.3212025  -0.4706807  -0.44096643  0.10076552  0.02042652\n",
      "  0.          0.        ]\n",
      "episode 10-step 56, taking action 3, observation [-0.25584468  1.3106942  -0.4606627  -0.4669641   0.09976532 -0.02000381\n",
      "  0.          0.        ]\n",
      "episode 10-step 57, taking action 3, observation [-0.2603732   1.2996048  -0.44957414 -0.49264824  0.09651849 -0.06493659\n",
      "  0.          0.        ]\n",
      "episode 10-step 58, taking action 1, observation [-0.2649952   1.2879083  -0.4612988  -0.51978385  0.09562579 -0.01785414\n",
      "  0.          0.        ]\n",
      "episode 10-step 59, taking action 0, observation [-0.26961714  1.275612   -0.4612988  -0.54645056  0.09473309 -0.01785379\n",
      "  0.          0.        ]\n",
      "episode 10-step 60, taking action 2, observation [-0.27425823  1.2641145  -0.4635268  -0.5109609   0.09416013 -0.01145927\n",
      "  0.          0.        ]\n",
      "episode 10-step 61, taking action 0, observation [-0.27889928  1.2520171  -0.46352682 -0.5376276   0.09358718 -0.01145923\n",
      "  0.          0.        ]\n",
      "episode 10-step 62, taking action 0, observation [-0.28354034  1.2393196  -0.46352682 -0.5642944   0.09301421 -0.01145945\n",
      "  0.          0.        ]\n",
      "episode 10-step 63, taking action 2, observation [-0.2883841   1.226612   -0.4830245  -0.5647      0.09167625 -0.02675917\n",
      "  0.          0.        ]\n",
      "episode 10-step 64, taking action 3, observation [-0.29313236  1.2133055  -0.47105685 -0.591171    0.08794498 -0.07462539\n",
      "  0.          0.        ]\n",
      "episode 10-step 65, taking action 1, observation [-0.2979749   1.1993786  -0.48291692 -0.6188947   0.08661482 -0.02660312\n",
      "  0.          0.        ]\n",
      "episode 10-step 66, taking action 0, observation [-0.30281752  1.1848518  -0.4829169  -0.6455614   0.08528467 -0.02660306\n",
      "  0.          0.        ]\n",
      "episode 10-step 67, taking action 2, observation [-0.30758303  1.1712768  -0.475977   -0.6033001   0.08471684 -0.01135662\n",
      "  0.          0.        ]\n",
      "episode 10-step 68, taking action 3, observation [-0.312263    1.1571093  -0.46525067 -0.6295124   0.08199411 -0.05445486\n",
      "  0.          0.        ]\n",
      "episode 10-step 69, taking action 2, observation [-0.3169195   1.1434728  -0.46329507 -0.60593694  0.07966501 -0.04658214\n",
      "  0.          0.        ]\n",
      "episode 10-step 70, taking action 0, observation [-0.32157597  1.1292363  -0.46329504 -0.632604    0.0773359  -0.04658191\n",
      "  0.          0.        ]\n",
      "episode 10-step 71, taking action 0, observation [-0.32623243  1.1144     -0.46329504 -0.6592711   0.07500681 -0.04658172\n",
      "  0.          0.        ]\n",
      "episode 10-step 72, taking action 2, observation [-0.33097133  1.0998092  -0.47134763 -0.6483663   0.07249254 -0.05028539\n",
      "  0.          0.        ]\n",
      "episode 10-step 73, taking action 0, observation [-0.33571023  1.0846181  -0.4713475  -0.6750335   0.06997827 -0.05028538\n",
      "  0.          0.        ]\n",
      "episode 10-step 74, taking action 0, observation [-0.34044915  1.0688272  -0.47134748 -0.7017006   0.067464   -0.05028551\n",
      "  0.          0.        ]\n",
      "episode 10-step 75, taking action 3, observation [-0.34511715  1.0524507  -0.46244344 -0.72765213  0.06315234 -0.08623315\n",
      "  0.          0.        ]\n",
      "episode 10-step 76, taking action 3, observation [-0.3497018   1.0354915  -0.45197067 -0.75348705  0.05672765 -0.12849383\n",
      "  0.          0.        ]\n",
      "episode 10-step 77, taking action 2, observation [-0.35420537  1.0185014  -0.44433475 -0.7548963   0.05077698 -0.11901325\n",
      "  0.          0.        ]\n",
      "episode 10-step 78, taking action 3, observation [-0.3586461   1.0009197  -0.43644714 -0.7811671   0.04324115 -0.15071668\n",
      "  0.          0.        ]\n",
      "episode 10-step 79, taking action 3, observation [-0.3630136   0.9827444  -0.42724878 -0.80755264  0.03386036 -0.18761605\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 10-step 80, taking action 3, observation [-0.36730343  0.96396536 -0.417516   -0.83440477  0.02253399 -0.22652741\n",
      "  0.          0.        ]\n",
      "episode 10-step 81, taking action 3, observation [-0.37152594  0.94458073 -0.4090678  -0.8613977   0.00951909 -0.26029822\n",
      "  0.          0.        ]\n",
      "episode 10-step 82, taking action 0, observation [-0.3757485   0.9245982  -0.4090674  -0.88807845 -0.00349566 -0.2602952\n",
      "  0.          0.        ]\n",
      "episode 10-step 83, taking action 1, observation [-0.38003692  0.90402174 -0.41734213 -0.9145823  -0.01485287 -0.22714429\n",
      "  0.          0.        ]\n",
      "episode 10-step 84, taking action 0, observation [-0.3843254   0.882847   -0.41734225 -0.9412598  -0.02620999 -0.2271423\n",
      "  0.          0.        ]\n",
      "episode 10-step 85, taking action 2, observation [-0.38870454  0.8621388  -0.42590064 -0.92062724 -0.03807304 -0.23726098\n",
      "  0.          0.        ]\n",
      "episode 10-step 86, taking action 3, observation [-0.39299217  0.8408303  -0.4144318  -0.9474716  -0.05223361 -0.28321165\n",
      "  0.          0.        ]\n",
      "episode 10-step 87, taking action 0, observation [-0.39727968  0.8189245  -0.4144329  -0.9741551  -0.06639398 -0.28320777\n",
      "  0.          0.        ]\n",
      "episode 10-step 88, taking action 2, observation [-0.4015531   0.7972232  -0.41293034 -0.96520036 -0.08065387 -0.28519768\n",
      "  0.          0.        ]\n",
      "episode 10-step 89, taking action 0, observation [-0.40582633  0.7749247  -0.41293225 -0.991884   -0.09491355 -0.28519368\n",
      "  0.          0.        ]\n",
      "episode 10-step 90, taking action 0, observation [-0.41009936  0.75202876 -0.41293448 -1.0185676  -0.10917304 -0.2851897\n",
      "  0.          0.        ]\n",
      "episode 10-step 91, taking action 2, observation [-0.41437164  0.7293862  -0.41259137 -1.0074713  -0.12371573 -0.29085392\n",
      "  0.          0.        ]\n",
      "episode 10-step 92, taking action 2, observation [-0.41843867  0.7070409  -0.39269668 -0.99434364 -0.137658   -0.27884513\n",
      "  0.          0.        ]\n",
      "episode 10-step 93, taking action 2, observation [-0.4225332   0.684863   -0.3950267  -0.98707885 -0.15205145 -0.28786892\n",
      "  0.          0.        ]\n",
      "episode 10-step 94, taking action 3, observation [-0.4265316   0.66206807 -0.38299018 -1.0149186  -0.16889425 -0.33685598\n",
      "  0.          0.        ]\n",
      "episode 10-step 95, taking action 3, observation [-0.43046993  0.6386633  -0.3755155  -1.0424025  -0.18726383 -0.36739215\n",
      "  0.          0.        ]\n",
      "episode 10-step 96, taking action 0, observation [-0.43440777  0.61466306 -0.3755229  -1.0690969  -0.20563301 -0.36738363\n",
      "  0.          0.        ]\n",
      "episode 10-step 97, taking action 2, observation [-0.43816438  0.59107983 -0.3576521  -1.0507457  -0.22382243 -0.36378828\n",
      "  0.          0.        ]\n",
      "episode 10-step 98, taking action 2, observation [-0.44183964  0.56794924 -0.34924394 -1.0309045  -0.24236558 -0.37086326\n",
      "  0.          0.        ]\n",
      "episode 10-step 99, taking action 2, observation [-0.44539118  0.5456921  -0.33643106 -0.99240774 -0.26144394 -0.38156694\n",
      "  0.          0.        ]\n",
      "episode 11-step 0, taking action 0, observation [ 1.0315895e-03  1.4074628e+00  5.2162211e-02 -8.9666970e-02\n",
      " -1.1756277e-03 -1.1693917e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 11-step 1, taking action 0, observation [ 0.00154734  1.4048454   0.05216402 -0.11633194 -0.00175992 -0.0116871\n",
      "  0.          0.        ]\n",
      "episode 11-step 2, taking action 0, observation [ 0.00206308  1.4016279   0.05216577 -0.14300314 -0.00234414 -0.01168538\n",
      "  0.          0.        ]\n",
      "episode 11-step 3, taking action 2, observation [ 0.00249357  1.3987291   0.04405589 -0.12883648 -0.00334357 -0.01999068\n",
      "  0.          0.        ]\n",
      "episode 11-step 4, taking action 3, observation [ 0.00299492  1.3952253   0.05294684 -0.1557353  -0.00612486 -0.05563102\n",
      "  0.          0.        ]\n",
      "episode 11-step 5, taking action 2, observation [ 0.00348196  1.3917546   0.05159424 -0.1542664  -0.0089863  -0.05723413\n",
      "  0.          0.        ]\n",
      "episode 11-step 6, taking action 1, observation [ 0.00388927  1.3876731   0.04159497 -0.18140534 -0.00984292 -0.01713342\n",
      "  0.          0.        ]\n",
      "episode 11-step 7, taking action 3, observation [ 0.00438452  1.38298     0.05261945 -0.20860963 -0.01291192 -0.06138571\n",
      "  0.          0.        ]\n",
      "episode 11-step 8, taking action 1, observation [ 0.00480995  1.3776964   0.04386596 -0.23483466 -0.01422034 -0.02617064\n",
      "  0.          0.        ]\n",
      "episode 11-step 9, taking action 1, observation [ 0.0051384   1.371809    0.03169081 -0.2616559  -0.01308965  0.02261582\n",
      "  0.          0.        ]\n",
      "episode 11-step 10, taking action 3, observation [ 0.00553055  1.3653295   0.0396974  -0.2879794  -0.01356357 -0.00947894\n",
      "  0.          0.        ]\n",
      "episode 11-step 11, taking action 3, observation [ 0.00599537  1.3582541   0.04881238 -0.31448963 -0.01586175 -0.04596801\n",
      "  0.          0.        ]\n",
      "episode 11-step 12, taking action 0, observation [ 0.00646029  1.3505785   0.04881928 -0.34115875 -0.01815915 -0.04595222\n",
      "  0.          0.        ]\n",
      "episode 11-step 13, taking action 0, observation [ 0.0069253   1.3423032   0.0488258  -0.36782777 -0.0204566  -0.04595316\n",
      "  0.          0.        ]\n",
      "episode 11-step 14, taking action 2, observation [ 0.00735083  1.3343999   0.04513246 -0.35128614 -0.02300175 -0.05090817\n",
      "  0.          0.        ]\n",
      "episode 11-step 15, taking action 3, observation [ 0.00786638  1.3258926   0.05641237 -0.37818938 -0.0278066  -0.09610575\n",
      "  0.          0.        ]\n",
      "episode 11-step 16, taking action 0, observation [ 0.00838213  1.3167856   0.05642671 -0.40485635 -0.03260991 -0.09607476\n",
      "  0.          0.        ]\n",
      "episode 11-step 17, taking action 3, observation [ 0.00897999  1.3070812   0.06672902 -0.43146467 -0.03947262 -0.13726662\n",
      "  0.          0.        ]\n",
      "episode 11-step 18, taking action 2, observation [ 0.00966444  1.2981389   0.07514059 -0.39764053 -0.04609768 -0.13251346\n",
      "  0.          0.        ]\n",
      "episode 11-step 19, taking action 0, observation [ 0.01034908  1.2885964   0.07515837 -0.42432392 -0.05272235 -0.13250504\n",
      "  0.          0.        ]\n",
      "episode 11-step 20, taking action 3, observation [ 0.01110659  1.2784383   0.084305   -0.4517941  -0.06118803 -0.16932908\n",
      "  0.          0.        ]\n",
      "episode 11-step 21, taking action 1, observation [ 0.01178303  1.2676783   0.07412719 -0.4785036  -0.06761269 -0.12850448\n",
      "  0.          0.        ]\n",
      "episode 11-step 22, taking action 1, observation [ 0.01236353  1.2563305   0.06208173 -0.50453264 -0.07161034 -0.07996006\n",
      "  0.          0.        ]\n",
      "episode 11-step 23, taking action 3, observation [ 0.01301737  1.2443666   0.07128742 -0.5320253  -0.07746734 -0.11715049\n",
      "  0.          0.        ]\n",
      "episode 11-step 24, taking action 2, observation [ 0.013622    1.233354    0.06697828 -0.48980945 -0.08392043 -0.1290613\n",
      "  0.          0.        ]\n",
      "episode 11-step 25, taking action 2, observation [ 0.01418285  1.2223034   0.06294937 -0.49153507 -0.09072049 -0.1360009\n",
      "  0.          0.        ]\n",
      "episode 11-step 26, taking action 1, observation [ 0.01468     1.2106719   0.05491975 -0.5172778  -0.09588733 -0.10333683\n",
      "  0.          0.        ]\n",
      "episode 11-step 27, taking action 0, observation [ 0.01517706  1.1984409   0.05491943 -0.5439467  -0.10105418 -0.10333677\n",
      "  0.          0.        ]\n",
      "episode 11-step 28, taking action 1, observation [ 0.01559162  1.1856306   0.04454053 -0.5695606  -0.10411284 -0.06117348\n",
      "  0.          0.        ]\n",
      "episode 11-step 29, taking action 1, observation [ 0.01594753  1.1722386   0.03715914 -0.5953039  -0.10566493 -0.03104185\n",
      "  0.          0.        ]\n",
      "episode 11-step 30, taking action 2, observation [ 0.01653624  1.159158    0.0596512  -0.5814171  -0.10644277 -0.01555656\n",
      "  0.          0.        ]\n",
      "episode 11-step 31, taking action 0, observation [ 0.01712494  1.1454775   0.0596512  -0.6080838  -0.10722061 -0.01555668\n",
      "  0.          0.        ]\n",
      "episode 11-step 32, taking action 3, observation [ 0.01777658  1.131186    0.06756005 -0.63535196 -0.10959754 -0.04753868\n",
      "  0.          0.        ]\n",
      "episode 11-step 33, taking action 1, observation [ 1.8332005e-02  1.1163046e+00  5.5494480e-02 -6.6138726e-01\n",
      " -1.0954661e-01  1.0185386e-03  0.0000000e+00  0.0000000e+00]\n",
      "episode 11-step 34, taking action 2, observation [ 0.01902008  1.1021378   0.06858789 -0.6296205  -0.10932894  0.00435337\n",
      "  0.          0.        ]\n",
      "episode 11-step 35, taking action 3, observation [ 0.01978302  1.0873697   0.07795171 -0.6564834  -0.11098333 -0.03308745\n",
      "  0.          0.        ]\n",
      "episode 11-step 36, taking action 2, observation [ 0.02075319  1.0727935   0.09800346 -0.6479095  -0.11195453 -0.01942391\n",
      "  0.          0.        ]\n",
      "episode 11-step 37, taking action 1, observation [ 0.02164755  1.0576373   0.08844937 -0.67352974 -0.11098009  0.01948869\n",
      "  0.          0.        ]\n",
      "episode 11-step 38, taking action 2, observation [ 0.02269869  1.0425258   0.10362291 -0.67151827 -0.10950492  0.02950383\n",
      "  0.          0.        ]\n",
      "episode 11-step 39, taking action 3, observation [ 2.3810577e-02  1.0268145e+00  1.1123409e-01 -6.9827837e-01\n",
      " -1.0954860e-01 -8.7384705e-04  0.0000000e+00  0.0000000e+00]\n",
      "episode 11-step 40, taking action 0, observation [ 2.49224659e-02  1.01050329e+00  1.11234091e-01 -7.24944949e-01\n",
      " -1.09592296e-01 -8.73902813e-04  0.00000000e+00  0.00000000e+00]\n",
      "episode 11-step 41, taking action 0, observation [ 2.6034450e-02  9.9359220e-01  1.1123409e-01 -7.5161159e-01\n",
      " -1.0963600e-01 -8.7391789e-04  0.0000000e+00  0.0000000e+00]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 11-step 42, taking action 2, observation [ 0.0270731   0.97685623  0.10451229 -0.74386775 -0.110278   -0.01284003\n",
      "  0.          0.        ]\n",
      "episode 11-step 43, taking action 1, observation [ 0.0280509   0.9595296   0.0968781  -0.7700115  -0.1093787   0.01798613\n",
      "  0.          0.        ]\n",
      "episode 11-step 44, taking action 1, observation [ 0.02895946  0.9416106   0.08818564 -0.79620475 -0.10672951  0.05298392\n",
      "  0.          0.        ]\n",
      "episode 11-step 45, taking action 1, observation [ 0.02978449  0.9231118   0.07768741 -0.8218346  -0.10194805  0.09562925\n",
      "  0.          0.        ]\n",
      "episode 11-step 46, taking action 0, observation [ 0.03060951  0.90401334  0.07768711 -0.84850323 -0.0971666   0.09562912\n",
      "  0.          0.        ]\n",
      "episode 11-step 47, taking action 2, observation [ 0.03135281  0.8854122   0.07021486 -0.826457   -0.09309003  0.08153149\n",
      "  0.          0.        ]\n",
      "episode 11-step 48, taking action 2, observation [ 0.03201742  0.866892    0.06290244 -0.822908   -0.08957124  0.07037556\n",
      "  0.          0.        ]\n",
      "episode 11-step 49, taking action 1, observation [ 0.03262024  0.84777635  0.05516668 -0.84928226 -0.08449962  0.10143254\n",
      "  0.          0.        ]\n",
      "episode 11-step 50, taking action 0, observation [ 0.03322315  0.82806116  0.0551664  -0.8759511  -0.07942801  0.10143236\n",
      "  0.          0.        ]\n",
      "episode 11-step 51, taking action 1, observation [ 0.03375511  0.8077572   0.04625671 -0.902045   -0.07256053  0.13734975\n",
      "  0.          0.        ]\n",
      "episode 11-step 52, taking action 1, observation [ 0.03422251  0.78685385  0.03816312 -0.9286505  -0.06407483  0.16971418\n",
      "  0.          0.        ]\n",
      "episode 11-step 53, taking action 3, observation [ 0.03476534  0.76533407  0.04763732 -0.95616037 -0.05750306  0.1314353\n",
      "  0.          0.        ]\n",
      "episode 11-step 54, taking action 3, observation [ 0.03540106  0.7432108   0.05929098 -0.98309743 -0.05326712  0.08471891\n",
      "  0.          0.        ]\n",
      "episode 11-step 55, taking action 2, observation [ 0.03591633  0.72167933  0.04799778 -0.95683384 -0.04978356  0.06967144\n",
      "  0.          0.        ]\n",
      "episode 11-step 56, taking action 3, observation [ 0.0365056   0.6995434   0.05728319 -0.9837633  -0.04816217  0.03242796\n",
      "  0.          0.        ]\n",
      "episode 11-step 57, taking action 1, observation [ 0.03699808  0.67680734  0.04516107 -1.0103699  -0.04411487  0.08094572\n",
      "  0.          0.        ]\n",
      "episode 11-step 58, taking action 0, observation [ 0.03749065  0.6534714   0.04516097 -1.037038   -0.04006758  0.08094564\n",
      "  0.          0.        ]\n",
      "episode 11-step 59, taking action 0, observation [ 0.03798313  0.6295358   0.04516088 -1.0637059  -0.03602031  0.08094554\n",
      "  0.          0.        ]\n",
      "episode 11-step 60, taking action 3, observation [ 0.03856564  0.6049921   0.05643923 -1.0907845  -0.03423584  0.03568928\n",
      "  0.          0.        ]\n",
      "episode 11-step 61, taking action 3, observation [ 0.03921232  0.5798411   0.06449582 -1.1178166  -0.03406862  0.00334426\n",
      "  0.          0.        ]\n",
      "episode 11-step 62, taking action 2, observation [ 0.0397562   0.55502015  0.05479484 -1.103162   -0.03447432 -0.00811388\n",
      "  0.          0.        ]\n",
      "episode 11-step 63, taking action 3, observation [ 0.04038706  0.5295827   0.06572592 -1.1306145  -0.03707809 -0.05207562\n",
      "  0.          0.        ]\n",
      "episode 11-step 64, taking action 1, observation [ 0.04095793  0.50355166  0.05818022 -1.1569632  -0.03816733 -0.02178445\n",
      "  0.          0.        ]\n",
      "episode 11-step 65, taking action 3, observation [ 0.04159536  0.47691372  0.06655814 -1.183982   -0.04093822 -0.0554178\n",
      "  0.          0.        ]\n",
      "episode 11-step 66, taking action 2, observation [ 0.0423873   0.45058832  0.08138387 -1.1700808  -0.04311093 -0.04345403\n",
      "  0.          0.        ]\n",
      "episode 11-step 67, taking action 2, observation [ 0.04312763  0.42426354  0.07655521 -1.1700622  -0.04560141 -0.04980977\n",
      "  0.          0.        ]\n",
      "episode 11-step 68, taking action 3, observation [ 0.04393797  0.39733186  0.08534338 -1.197101   -0.04985645 -0.08510087\n",
      "  0.          0.        ]\n",
      "episode 11-step 69, taking action 3, observation [ 0.0448432   0.36979267  0.09723251 -1.2242019  -0.05649769 -0.13282478\n",
      "  0.          0.        ]\n",
      "episode 11-step 70, taking action 3, observation [ 0.04582367  0.34164092  0.10669782 -1.2515414  -0.06504583 -0.1709627\n",
      "  0.          0.        ]\n",
      "episode 11-step 71, taking action 1, observation [ 0.04674349  0.31289408  0.09907897 -1.2779609  -0.07206526 -0.1403887\n",
      "  0.          0.        ]\n",
      "episode 11-step 72, taking action 2, observation [ 0.04765558  0.28477472  0.09860733 -1.2501248  -0.07939306 -0.146556\n",
      "  0.          0.        ]\n",
      "episode 11-step 73, taking action 2, observation [ 0.04875288  0.25712135  0.11652545 -1.2294121  -0.08612736 -0.1346861\n",
      "  0.          0.        ]\n",
      "episode 11-step 74, taking action 1, observation [ 0.04978323  0.2288852   0.10810491 -1.2552378  -0.09115412 -0.10053527\n",
      "  0.          0.        ]\n",
      "episode 11-step 75, taking action 0, observation [ 0.05081367  0.20004946  0.10810461 -1.2819065  -0.09618087 -0.10053507\n",
      "  0.          0.        ]\n",
      "episode 11-step 76, taking action 1, observation [ 0.05178099  0.17062606  0.10018317 -1.3079331  -0.09960528 -0.06848826\n",
      "  0.          0.        ]\n",
      "episode 11-step 77, taking action 0, observation [ 0.0527483   0.14060284  0.10018303 -1.3346007  -0.10302968 -0.0684882\n",
      "  0.          0.        ]\n",
      "episode 11-step 78, taking action 0, observation [ 0.05371561  0.10997973  0.10018289 -1.3612684  -0.1064541  -0.06848816\n",
      "  0.          0.        ]\n",
      "episode 11-step 79, taking action 0, observation [ 0.05468292  0.07875682  0.10018275 -1.387936   -0.1098785  -0.06848811\n",
      "  0.          0.        ]\n",
      "episode 11-step 80, taking action 1, observation [ 0.05556002  0.04694668  0.08885502 -1.4138677  -0.1110176  -0.02278194\n",
      "  0.          0.        ]\n",
      "episode 11-step 81, taking action 2, observation [ 0.05662289  0.0160487   0.10709579 -1.373304   -0.11181892 -0.01602646\n",
      "  0.          0.        ]\n",
      "episode 11-step 82, taking action 0, observation [ 0.05768585 -0.01544923  0.10709579 -1.3999705  -0.11262026 -0.01602642\n",
      "  1.          0.        ]\n",
      "episode 11-step 83, taking action 3, observation [ 0.0593873  -0.04201991  0.01026326 -0.85695475 -0.00668988  4.902391\n",
      "  1.          1.        ]\n",
      "episode 11-step 84, taking action 0, observation [ 5.9486769e-02 -4.2787410e-02  1.4160995e-07  1.3199646e-07\n",
      "  2.1575199e-04 -1.0543664e-06  0.0000000e+00  1.0000000e+00]\n",
      "episode 11-step 85, taking action 1, observation [ 5.9542943e-02 -4.2786304e-02 -1.2319977e-05 -2.2191671e-09\n",
      "  2.9005497e-04  8.9348760e-09  0.0000000e+00  1.0000000e+00]\n",
      "episode 11-step 86, taking action 1, observation [ 5.9479810e-02 -4.2778291e-02 -6.5941131e-03 -3.6428294e-09\n",
      "  1.8296445e-04  9.0803953e-10  0.0000000e+00  1.0000000e+00]\n",
      "episode 11-step 87, taking action 2, observation [ 0.05938454 -0.04207806 -0.00742889  0.03112298 -0.0018882  -0.04143376\n",
      "  0.          1.        ]\n",
      "episode 11-step 88, taking action 0, observation [ 0.0592946  -0.04185379 -0.00633225  0.0099663  -0.00452947 -0.05283944\n",
      "  0.          1.        ]\n",
      "episode 11-step 89, taking action 2, observation [ 0.0591239  -0.04150696 -0.01293034  0.01539741 -0.00861888 -0.0818007\n",
      "  0.          1.        ]\n",
      "episode 11-step 90, taking action 3, observation [ 5.91084473e-02 -4.13315073e-02  5.50439756e-04  7.78869959e-03\n",
      " -1.06939515e-02 -4.15102541e-02  0.00000000e+00  1.00000000e+00]\n",
      "episode 11-step 91, taking action 2, observation [ 0.05898581 -0.04095675 -0.00778004  0.01662012 -0.01512948 -0.08872171\n",
      "  0.          1.        ]\n",
      "episode 11-step 92, taking action 1, observation [ 0.05882626 -0.04082854 -0.01441773  0.00568196 -0.01665076 -0.03043763\n",
      "  0.          1.        ]\n",
      "episode 11-step 93, taking action 2, observation [ 0.05853472 -0.0403927  -0.02393024  0.01930484 -0.02182722 -0.1035388\n",
      "  0.          1.        ]\n",
      "episode 11-step 94, taking action 0, observation [ 0.05829697 -0.04015656 -0.02093243  0.01045251 -0.02464136 -0.05629255\n",
      "  0.          1.        ]\n",
      "episode 11-step 95, taking action 1, observation [ 0.05801449 -0.04007416 -0.02725998  0.00364738 -0.02562521 -0.01968768\n",
      "  0.          1.        ]\n",
      "episode 11-step 96, taking action 0, observation [ 0.05777846 -0.03998944 -0.02258951  0.0037459  -0.02663645 -0.02023556\n",
      "  0.          1.        ]\n",
      "episode 11-step 97, taking action 3, observation [ 0.05767126 -0.03979099 -0.00831426  0.00877525 -0.02900812 -0.0474432\n",
      "  0.          1.        ]\n",
      "episode 11-step 98, taking action 1, observation [ 0.05752411 -0.03955649 -0.01188029  0.01036551 -0.0318151  -0.05614904\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          1.        ]\n",
      "episode 11-step 99, taking action 1, observation [ 0.05733623 -0.0392661  -0.01526697  0.01282617 -0.03529656 -0.06963754\n",
      "  0.          1.        ]\n",
      "episode 12-step 0, taking action 0, observation [ 7.6427462e-04  1.4215952e+00  3.8645204e-02  2.2437853e-01\n",
      " -8.6916011e-04 -8.6616939e-03  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 1, taking action 2, observation [ 1.0860444e-03  1.4268861e+00  3.2912333e-02  2.3515220e-01\n",
      " -1.5940041e-03 -1.4498045e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 2, taking action 0, observation [ 1.4078140e-03  1.4315767e+00  3.2914575e-02  2.0847416e-01\n",
      " -2.3184500e-03 -1.4490709e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 3, taking action 1, observation [ 1.6601563e-03  1.4356639e+00  2.4207700e-02  1.8164936e-01\n",
      " -1.2967541e-03  2.0435896e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 4, taking action 1, observation [1.8512725e-03 1.4391590e+00 1.6504582e-02 1.5534143e-01 1.2680647e-03\n",
      " 5.1301289e-02 0.0000000e+00 0.0000000e+00]\n",
      "episode 12-step 5, taking action 0, observation [0.0020422  1.4420542  0.01649686 0.12867257 0.00383218 0.05128738\n",
      " 0.         0.        ]\n",
      "episode 12-step 6, taking action 0, observation [0.00223303 1.4443495  0.01648939 0.10200384 0.00639625 0.05128598\n",
      " 0.         0.        ]\n",
      "episode 12-step 7, taking action 2, observation [ 2.2481917e-03  1.4467931e+00 -2.5529339e-04  1.0859895e-01\n",
      "  8.1394417e-03  3.4867369e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 8, taking action 0, observation [ 2.2632598e-03  1.4486368e+00 -2.6004779e-04  8.1924245e-02\n",
      "  9.8830750e-03  3.4875907e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 9, taking action 3, observation [ 0.00236912  1.4498886   0.0111393   0.05564604  0.00933864 -0.01088963\n",
      "  0.          0.        ]\n",
      "episode 12-step 10, taking action 3, observation [ 0.00255909  1.4505494   0.02169066  0.02937836  0.00667941 -0.05318951\n",
      "  0.          0.        ]\n",
      "episode 12-step 11, taking action 1, observation [ 0.0026638   1.4506161   0.01100008  0.00296704  0.00616466 -0.01029617\n",
      "  0.          0.        ]\n",
      "episode 12-step 12, taking action 2, observation [ 0.00261869  1.4511609  -0.00328936  0.02421514  0.00496252 -0.02404512\n",
      "  0.          0.        ]\n",
      "episode 12-step 13, taking action 2, observation [ 0.00264378  1.4523524   0.00336954  0.05295737  0.00411129 -0.01702624\n",
      "  0.          0.        ]\n",
      "episode 12-step 14, taking action 3, observation [ 2.7544976e-03  1.4529476e+00  1.4103958e-02  2.6464334e-02\n",
      "  1.1086096e-03 -6.0058784e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 15, taking action 1, observation [ 2.7927398e-03  1.4529532e+00  5.0224843e-03  2.4487122e-04\n",
      " -7.1103837e-05 -2.3596196e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 16, taking action 0, observation [ 2.8310777e-03  1.4523587e+00  5.0252946e-03 -2.6423955e-02\n",
      " -1.2518367e-03 -2.3616869e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 17, taking action 2, observation [ 0.00304174  1.4523052   0.02145098 -0.00237531 -0.00162975 -0.00755918\n",
      "  0.          0.        ]\n",
      "episode 12-step 18, taking action 2, observation [ 3.4254075e-03  1.4522917e+00  3.7932925e-02 -5.9985771e-04\n",
      " -1.2032457e-03  8.5308049e-03  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 19, taking action 2, observation [ 3.8082122e-03  1.4530498e+00  3.7857968e-02  3.3691633e-02\n",
      " -7.8606769e-04  8.3442209e-03  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 20, taking action 1, observation [0.00410051 1.4532017  0.02649114 0.00675411 0.00190833 0.05389305\n",
      " 0.         0.        ]\n",
      "episode 12-step 21, taking action 3, observation [ 0.00448828  1.4527587   0.03848587 -0.01968906  0.00219674  0.0057688\n",
      "  0.          0.        ]\n",
      "episode 12-step 22, taking action 1, observation [ 0.00478306  1.4517046   0.0268125  -0.04685235  0.00482531  0.0525759\n",
      "  0.          0.        ]\n",
      "episode 12-step 23, taking action 1, observation [ 0.00501032  1.4500477   0.01836315 -0.07366286  0.00914305  0.08636273\n",
      "  0.          0.        ]\n",
      "episode 12-step 24, taking action 3, observation [ 0.0053154   1.4477696   0.02811397 -0.10126865  0.01150802  0.04729955\n",
      "  0.          0.        ]\n",
      "episode 12-step 25, taking action 3, observation [ 0.00568476  1.4448864   0.03617375 -0.12815024  0.01225996  0.01503851\n",
      "  0.          0.        ]\n",
      "episode 12-step 26, taking action 3, observation [ 0.00611744  1.4414026   0.04411794 -0.1548333   0.01142113 -0.0167765\n",
      "  0.          0.        ]\n",
      "episode 12-step 27, taking action 3, observation [ 0.00661736  1.4373106   0.05254514 -0.1818438   0.00889621 -0.05049814\n",
      "  0.          0.        ]\n",
      "episode 12-step 28, taking action 3, observation [ 0.0072052   1.4326273   0.06357963 -0.20812291  0.00416026 -0.09471911\n",
      "  0.          0.        ]\n",
      "episode 12-step 29, taking action 0, observation [ 7.7930451e-03  1.4273444e+00  6.3579649e-02 -2.3479144e-01\n",
      " -5.7568954e-04 -9.4718985e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 12-step 30, taking action 1, observation [ 0.00831184  1.4214659   0.05491855 -0.26127166 -0.00357707 -0.06002751\n",
      "  0.          0.        ]\n",
      "episode 12-step 31, taking action 0, observation [ 0.00883064  1.4149876   0.05491854 -0.2879391  -0.00657845 -0.06002749\n",
      "  0.          0.        ]\n",
      "episode 12-step 32, taking action 1, observation [ 0.00928268  1.4079218   0.04653968 -0.314037   -0.00790044 -0.02643983\n",
      "  0.          0.        ]\n",
      "episode 12-step 33, taking action 1, observation [ 0.00964394  1.4002471   0.03516567 -0.34108916 -0.00694582  0.01909204\n",
      "  0.          0.        ]\n",
      "episode 12-step 34, taking action 3, observation [ 0.0100934   1.391982    0.04622433 -0.3673488  -0.00820476 -0.0251787\n",
      "  0.          0.        ]\n",
      "episode 12-step 35, taking action 3, observation [ 0.0106226   1.3831078   0.05621996 -0.39443368 -0.01146671 -0.06523904\n",
      "  0.          0.        ]\n",
      "episode 12-step 36, taking action 3, observation [ 0.01122046  1.3736327   0.06484364 -0.4211647  -0.01645586 -0.09978279\n",
      "  0.          0.        ]\n",
      "episode 12-step 37, taking action 3, observation [ 0.01188087  1.3635455   0.07268587 -0.4484021  -0.02301886 -0.13126019\n",
      "  0.          0.        ]\n",
      "episode 12-step 38, taking action 1, observation [ 0.01245337  1.3528738   0.06165761 -0.4743698  -0.02736773 -0.0869775\n",
      "  0.          0.        ]\n",
      "episode 12-step 39, taking action 3, observation [ 0.01310186  1.3415952   0.07118019 -0.5014034  -0.03362674 -0.12517993\n",
      "  0.          0.        ]\n",
      "episode 12-step 40, taking action 2, observation [ 0.01392212  1.3309172   0.08768798 -0.47470888 -0.03921542 -0.11177369\n",
      "  0.          0.        ]\n",
      "episode 12-step 41, taking action 2, observation [ 0.01474428  1.3208872   0.08801832 -0.4459439  -0.04494617 -0.11461508\n",
      "  0.          0.        ]\n",
      "episode 12-step 42, taking action 1, observation [ 0.01550236  1.3102635   0.07996132 -0.47229466 -0.04905962 -0.08226898\n",
      "  0.          0.        ]\n",
      "episode 12-step 43, taking action 2, observation [ 0.01628151  1.300403    0.0821875  -0.4383823  -0.05328126 -0.08443303\n",
      "  0.          0.        ]\n",
      "episode 12-step 44, taking action 2, observation [ 0.01725826  1.291365    0.10126374 -0.4018217  -0.05682328 -0.07084049\n",
      "  0.          0.        ]\n",
      "episode 12-step 45, taking action 3, observation [ 0.01829643  1.2817154   0.10896808 -0.42907494 -0.06191834 -0.10190125\n",
      "  0.          0.        ]\n",
      "episode 12-step 46, taking action 1, observation [ 0.01927433  1.2714734   0.10140616 -0.45535386 -0.06549281 -0.07148955\n",
      "  0.          0.        ]\n",
      "episode 12-step 47, taking action 3, observation [ 0.02032747  1.2606351   0.11082671 -0.48195407 -0.07094814 -0.10910647\n",
      "  0.          0.        ]\n",
      "episode 12-step 48, taking action 2, observation [ 0.0215229   1.2505931   0.12470003 -0.44656685 -0.07605188 -0.1020749\n",
      "  0.          0.        ]\n",
      "episode 12-step 49, taking action 2, observation [ 0.02273178  1.241475    0.12634411 -0.40553632 -0.08145037 -0.10797027\n",
      "  0.          0.        ]\n",
      "episode 12-step 50, taking action 1, observation [ 0.02387142  1.2317698   0.11763147 -0.43154353 -0.0850895  -0.07278235\n",
      "  0.          0.        ]\n",
      "episode 12-step 51, taking action 2, observation [ 0.02525291  1.2227287   0.14102995 -0.40199366 -0.08794472 -0.05710458\n",
      "  0.          0.        ]\n",
      "episode 12-step 52, taking action 1, observation [ 0.0265584   1.213098    0.13149339 -0.42808226 -0.08887829 -0.01867135\n",
      "  0.          0.        ]\n",
      "episode 12-step 53, taking action 2, observation [ 0.02809467  1.203726    0.15373072 -0.41654262 -0.08898362 -0.00210638\n",
      "  0.          0.        ]\n",
      "episode 12-step 54, taking action 1, observation [ 0.02954349  1.1937764   0.1427345  -0.44208065 -0.08685749  0.04252238\n",
      "  0.          0.        ]\n",
      "episode 12-step 55, taking action 1, observation [ 0.03092184  1.1832254   0.13390347 -0.46871075 -0.08296836  0.07778241\n",
      "  0.          0.        ]\n",
      "episode 12-step 56, taking action 2, observation [ 0.03227139  1.1733181   0.13148287 -0.44013458 -0.07952954  0.06877682\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 12-step 57, taking action 1, observation [ 0.03352623  1.1628299   0.11957905 -0.4658448  -0.07368553  0.11688002\n",
      "  0.          0.        ]\n",
      "episode 12-step 58, taking action 3, observation [ 0.03485403  1.1517222   0.12875241 -0.49348298 -0.06970074  0.07969556\n",
      "  0.          0.        ]\n",
      "episode 12-step 59, taking action 3, observation [ 0.0362752   1.140008    0.14046529 -0.52055925 -0.06806672  0.03268053\n",
      "  0.          0.        ]\n",
      "episode 12-step 60, taking action 2, observation [ 0.03757296  1.1288071   0.12893626 -0.49778232 -0.06723572  0.01661999\n",
      "  0.          0.        ]\n",
      "episode 12-step 61, taking action 3, observation [ 0.03894291  1.1169984   0.13799283 -0.5248701  -0.06822504 -0.01978633\n",
      "  0.          0.        ]\n",
      "episode 12-step 62, taking action 0, observation [ 0.04031286  1.1045899   0.13799283 -0.5515368  -0.06921437 -0.01978634\n",
      "  0.          0.        ]\n",
      "episode 12-step 63, taking action 0, observation [ 0.04168291  1.0915812   0.13799283 -0.57820356 -0.0702037  -0.01978633\n",
      "  0.          0.        ]\n",
      "episode 12-step 64, taking action 2, observation [ 0.04323196  1.0791698   0.1553364  -0.55164945 -0.07062495 -0.00842504\n",
      "  0.          0.        ]\n",
      "episode 12-step 65, taking action 0, observation [ 0.04478102  1.066158    0.15533638 -0.5783161  -0.07104619 -0.00842504\n",
      "  0.          0.        ]\n",
      "episode 12-step 66, taking action 2, observation [ 0.04657297  1.0540951   0.17883638 -0.53611344 -0.07068549  0.00721383\n",
      "  0.          0.        ]\n",
      "episode 12-step 67, taking action 1, observation [ 0.04830198  1.0414395   0.17091379 -0.562381   -0.0687315   0.03908011\n",
      "  0.          0.        ]\n",
      "episode 12-step 68, taking action 1, observation [ 0.04995928  1.0281996   0.16192888 -0.58826923 -0.06496211  0.07538761\n",
      "  0.          0.        ]\n",
      "episode 12-step 69, taking action 1, observation [ 0.05152025  1.0143619   0.14984116 -0.6147535  -0.05877266  0.12378933\n",
      "  0.          0.        ]\n",
      "episode 12-step 70, taking action 0, observation [ 0.05308132  0.99992466  0.14984083 -0.64142334 -0.0525832   0.12378903\n",
      "  0.          0.        ]\n",
      "episode 12-step 71, taking action 2, observation [ 0.05467587  0.9861866   0.1532489  -0.61037236 -0.04644929  0.12267842\n",
      "  0.          0.        ]\n",
      "episode 12-step 72, taking action 2, observation [ 0.05619059  0.97332144  0.1458502  -0.5716237  -0.04089508  0.11108442\n",
      "  0.          0.        ]\n",
      "episode 12-step 73, taking action 2, observation [ 0.05787668  0.96114695  0.16235133 -0.54093206 -0.03471117  0.12367809\n",
      "  0.          0.        ]\n",
      "episode 12-step 74, taking action 2, observation [ 0.0596693   0.9494283   0.17261507 -0.5206891  -0.02814784  0.13126653\n",
      "  0.          0.        ]\n",
      "episode 12-step 75, taking action 2, observation [ 0.06156893  0.93781716  0.1828792  -0.51593    -0.02115088  0.13993916\n",
      "  0.          0.        ]\n",
      "episode 12-step 76, taking action 3, observation [ 0.06355944  0.92560214  0.19427617 -0.54283327 -0.01643773  0.09426272\n",
      "  0.          0.        ]\n",
      "episode 12-step 77, taking action 3, observation [ 0.06564369  0.91278666  0.20603788 -0.5695535  -0.01408009  0.047153\n",
      "  0.          0.        ]\n",
      "episode 12-step 78, taking action 0, observation [ 0.06772804  0.8993713   0.20603785 -0.5962207  -0.01172245  0.047153\n",
      "  0.          0.        ]\n",
      "episode 12-step 79, taking action 3, observation [ 0.06989765  0.88535416  0.21673684 -0.6229867  -0.01150774  0.0042943\n",
      "  0.          0.        ]\n",
      "episode 12-step 80, taking action 1, observation [ 0.07200088  0.8707492   0.20841758 -0.64909536 -0.00962464  0.03766203\n",
      "  0.          0.        ]\n",
      "episode 12-step 81, taking action 3, observation [ 0.07418776  0.85555315  0.21891792 -0.6753825  -0.0098429  -0.00436532\n",
      "  0.          0.        ]\n",
      "episode 12-step 82, taking action 2, observation [ 0.07638168  0.84122574  0.21962628 -0.63677156 -0.01007345 -0.00461128\n",
      "  0.          0.        ]\n",
      "episode 12-step 83, taking action 2, observation [ 0.07869234  0.8276403   0.23079434 -0.6037935  -0.00980557  0.00535753\n",
      "  0.          0.        ]\n",
      "episode 12-step 84, taking action 1, observation [ 0.08091535  0.8134486   0.21978863 -0.6307271  -0.00733473  0.04941688\n",
      "  0.          0.        ]\n",
      "episode 12-step 85, taking action 2, observation [ 0.08312168  0.8000745   0.2182366  -0.5943951  -0.00497273  0.04724015\n",
      "  0.          0.        ]\n",
      "episode 12-step 86, taking action 2, observation [ 0.08537836  0.7869687   0.2230567  -0.58247316 -0.00239064  0.05164164\n",
      "  0.          0.        ]\n",
      "episode 12-step 87, taking action 3, observation [ 0.08769493  0.773259    0.23056343 -0.6093202  -0.00131205  0.02157162\n",
      "  0.          0.        ]\n",
      "episode 12-step 88, taking action 3, observation [ 0.09010353  0.75895214  0.24212174 -0.63586205 -0.00254815 -0.02472181\n",
      "  0.          0.        ]\n",
      "episode 12-step 89, taking action 0, observation [ 0.09251223  0.7440452   0.24212174 -0.6625288  -0.00378424 -0.02472184\n",
      "  0.          0.        ]\n",
      "episode 12-step 90, taking action 1, observation [ 0.09485283  0.7285339   0.23358658 -0.6893949  -0.00331133  0.00945806\n",
      "  0.          0.        ]\n",
      "episode 12-step 91, taking action 0, observation [ 0.09719343  0.7124225   0.2335866  -0.71606165 -0.00283842  0.00945806\n",
      "  0.          0.        ]\n",
      "episode 12-step 92, taking action 0, observation [ 0.09953413  0.6957111   0.23358658 -0.7427283  -0.00236551  0.00945806\n",
      "  0.          0.        ]\n",
      "episode 12-step 93, taking action 2, observation [ 0.10189657  0.679525    0.23566976 -0.71938246 -0.00179995  0.01131095\n",
      "  0.          0.        ]\n",
      "episode 12-step 94, taking action 2, observation [ 0.10408411  0.66367507  0.21901162 -0.70444286 -0.0020483  -0.00496706\n",
      "  0.          0.        ]\n",
      "episode 12-step 95, taking action 3, observation [ 0.1063325   0.6472269   0.2266324  -0.7310314  -0.00382277 -0.03548945\n",
      "  0.          0.        ]\n",
      "episode 12-step 96, taking action 0, observation [ 0.10858078  0.6301788   0.22663239 -0.7576983  -0.00559724 -0.03548945\n",
      "  0.          0.        ]\n",
      "episode 12-step 97, taking action 3, observation [ 0.11090612  0.612523    0.23628116 -0.78471917 -0.00930475 -0.07415033\n",
      "  0.          0.        ]\n",
      "episode 12-step 98, taking action 1, observation [ 0.1131588   0.59425896  0.22718307 -0.8117524  -0.0111916  -0.03773684\n",
      "  0.          0.        ]\n",
      "episode 12-step 99, taking action 2, observation [ 0.11536817  0.57604945  0.22307141 -0.80932516 -0.01330165 -0.04220086\n",
      "  0.          0.        ]\n",
      "episode 13-step 0, taking action 3, observation [ 0.0132513   1.4096103   0.67584157 -0.04220037 -0.01700136 -0.18751016\n",
      "  0.          0.        ]\n",
      "episode 13-step 1, taking action 3, observation [ 0.01998081  1.408056    0.6840873  -0.06923936 -0.02802031 -0.22039954\n",
      "  0.          0.        ]\n",
      "episode 13-step 2, taking action 0, observation [ 0.02671061  1.4059033   0.6841207  -0.09591881 -0.03903733 -0.22036107\n",
      "  0.          0.        ]\n",
      "episode 13-step 3, taking action 2, observation [ 0.03330164  1.404717    0.67110354 -0.05308374 -0.05091288 -0.23753309\n",
      "  0.          0.        ]\n",
      "episode 13-step 4, taking action 2, observation [ 0.03997888  1.4038334   0.67944163 -0.03970161 -0.0625223  -0.23220961\n",
      "  0.          0.        ]\n",
      "episode 13-step 5, taking action 0, observation [ 0.04665642  1.4023517   0.67947465 -0.0663811  -0.07413035 -0.23218235\n",
      "  0.          0.        ]\n",
      "episode 13-step 6, taking action 1, observation [ 0.05326834  1.4002775   0.6712072  -0.0927181  -0.08406711 -0.19875345\n",
      "  0.          0.        ]\n",
      "episode 13-step 7, taking action 0, observation [ 0.05988064  1.3976045   0.67123425 -0.11939458 -0.09400301 -0.19873606\n",
      "  0.          0.        ]\n",
      "episode 13-step 8, taking action 3, observation [ 0.06658735  1.3943301   0.6830538  -0.1463453  -0.10630013 -0.24596505\n",
      "  0.          0.        ]\n",
      "episode 13-step 9, taking action 3, observation [ 0.07336416  1.3904433   0.69181794 -0.17381631 -0.12036584 -0.28134012\n",
      "  0.          0.        ]\n",
      "episode 13-step 10, taking action 0, observation [ 0.08014154  1.385959    0.6918573  -0.20050162 -0.13442895 -0.28128824\n",
      "  0.          0.        ]\n",
      "episode 13-step 11, taking action 3, observation [ 0.08701344  1.380862    0.70366096 -0.22809848 -0.1508735  -0.3289214\n",
      "  0.          0.        ]\n",
      "episode 13-step 12, taking action 0, observation [ 0.09388618  1.3751686   0.7037051  -0.2547909  -0.16731425 -0.32884496\n",
      "  0.          0.        ]\n",
      "episode 13-step 13, taking action 2, observation [ 0.10084572  1.3703415   0.7126933  -0.21650532 -0.18411827 -0.3361116\n",
      "  0.          0.        ]\n",
      "episode 13-step 14, taking action 3, observation [ 0.10787229  1.3648973   0.7210479  -0.24436514 -0.2026373  -0.37041438\n",
      "  0.          0.        ]\n",
      "episode 13-step 15, taking action 0, observation [ 0.11490011  1.3588573   0.7210951  -0.27106002 -0.22115263 -0.37033996\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 13-step 16, taking action 1, observation [ 0.12183399  1.3522413   0.70923126 -0.29650036 -0.23723263 -0.3216291\n",
      "  0.          0.        ]\n",
      "episode 13-step 17, taking action 2, observation [ 0.1288455   1.3463111   0.7175196  -0.26628718 -0.25390157 -0.3334095\n",
      "  0.          0.        ]\n",
      "episode 13-step 18, taking action 2, observation [ 0.13617173  1.3410282   0.7484728  -0.23762527 -0.27012104 -0.3244192\n",
      "  0.          0.        ]\n",
      "episode 13-step 19, taking action 1, observation [ 0.1434103   1.3351766   0.7373882  -0.26262626 -0.28402025 -0.27800888\n",
      "  0.          0.        ]\n",
      "episode 13-step 20, taking action 0, observation [ 0.15064983  1.3287272   0.73742026 -0.28931215 -0.29791802 -0.27798036\n",
      "  0.          0.        ]\n",
      "episode 13-step 21, taking action 1, observation [ 0.15783043  1.3217041   0.72993755 -0.31461638 -0.31022024 -0.24606657\n",
      "  0.          0.        ]\n",
      "episode 13-step 22, taking action 0, observation [ 0.1650118   1.3140829   0.7299636  -0.34130394 -0.3225212  -0.24604125\n",
      "  0.          0.        ]\n",
      "episode 13-step 23, taking action 1, observation [ 0.17211457  1.3058965   0.72002536 -0.36604315 -0.33269343 -0.20346236\n",
      "  0.          0.        ]\n",
      "episode 13-step 24, taking action 2, observation [ 0.17938499  1.2985191   0.73776776 -0.33039293 -0.34392563 -0.22465162\n",
      "  0.          0.        ]\n",
      "episode 13-step 25, taking action 3, observation [ 0.18672657  1.2904972   0.7461894  -0.35945705 -0.3565818  -0.25312358\n",
      "  0.          0.        ]\n",
      "episode 13-step 26, taking action 1, observation [ 0.19397935  1.2819142   0.7350185  -0.3839122  -0.3668498  -0.20536005\n",
      "  0.          0.        ]\n",
      "episode 13-step 27, taking action 2, observation [ 0.201618    1.2741486   0.77346224 -0.34763592 -0.37702006 -0.2034051\n",
      "  0.          0.        ]\n",
      "episode 13-step 28, taking action 3, observation [ 0.20934486  1.2657324   0.78456956 -0.3772334  -0.3896657  -0.25291288\n",
      "  0.          0.        ]\n",
      "episode 13-step 29, taking action 3, observation [ 0.21715327  1.2566643   0.7948455  -0.40693754 -0.40465045 -0.2996952\n",
      "  0.          0.        ]\n",
      "episode 13-step 30, taking action 1, observation [ 0.22487536  1.2470508   0.78383815 -0.43063673 -0.41716683 -0.25032768\n",
      "  0.          0.        ]\n",
      "episode 13-step 31, taking action 0, observation [ 0.23259811  1.236839    0.7838303  -0.45731547 -0.42968312 -0.2503252\n",
      "  0.          0.        ]\n",
      "episode 13-step 32, taking action 3, observation [ 0.2404087   1.22597     0.7948898  -0.48737034 -0.44474548 -0.30124703\n",
      "  0.          0.        ]\n",
      "episode 13-step 33, taking action 3, observation [ 0.24828215  1.2144583   0.80274266 -0.5166172  -0.46164596 -0.33800974\n",
      "  0.          0.        ]\n",
      "episode 13-step 34, taking action 1, observation [ 0.25608557  1.2024087   0.7936568  -0.5400203  -0.47636694 -0.29441923\n",
      "  0.          0.        ]\n",
      "episode 13-step 35, taking action 2, observation [ 0.26406288  1.1903338   0.81103575 -0.54130936 -0.49118534 -0.2963681\n",
      "  0.          0.        ]\n",
      "episode 13-step 36, taking action 0, observation [ 0.27204123  1.1776615   0.8110229  -0.5679923  -0.5060035  -0.29636362\n",
      "  0.          0.        ]\n",
      "episode 13-step 37, taking action 0, observation [ 0.28002053  1.1643919   0.8110096  -0.59467524 -0.52082145 -0.29635912\n",
      "  0.          0.        ]\n",
      "episode 13-step 38, taking action 3, observation [ 0.28805703  1.1504773   0.8181545  -0.62406445 -0.5373913  -0.33139703\n",
      "  0.          0.        ]\n",
      "episode 13-step 39, taking action 2, observation [ 0.2962718   1.1364355   0.83586437 -0.6298907  -0.55399925 -0.3321583\n",
      "  0.          0.        ]\n",
      "episode 13-step 40, taking action 1, observation [ 0.30443716  1.1218435   0.8293897  -0.65391624 -0.5689785  -0.29958487\n",
      "  0.          0.        ]\n",
      "episode 13-step 41, taking action 0, observation [ 0.31260356  1.1066538   0.8293747  -0.68059886 -0.5839575  -0.29958025\n",
      "  0.          0.        ]\n",
      "episode 13-step 42, taking action 2, observation [ 0.32117376  1.0914273   0.8691087  -0.6821397  -0.59833664 -0.28758258\n",
      "  0.          0.        ]\n",
      "episode 13-step 43, taking action 2, observation [ 0.33031282  1.0763934   0.92522746 -0.67339206 -0.6119183  -0.27163306\n",
      "  0.          0.        ]\n",
      "episode 13-step 44, taking action 1, observation [ 0.33937865  1.0608475   0.91564596 -0.6952043  -0.6228751  -0.21913603\n",
      "  0.          0.        ]\n",
      "episode 13-step 45, taking action 2, observation [ 0.34884197  1.0459418   0.9559053  -0.6671217  -0.6345696  -0.23389038\n",
      "  0.          0.        ]\n",
      "episode 13-step 46, taking action 3, observation [ 0.35837096  1.0303564   0.9643242  -0.6983646  -0.64864624 -0.2815327\n",
      "  0.          0.        ]\n",
      "episode 13-step 47, taking action 2, observation [ 0.3683362   1.0154204   1.008414   -0.66991204 -0.6634411  -0.29589885\n",
      "  0.          0.        ]\n",
      "episode 13-step 48, taking action 2, observation [ 0.37848777  1.0005904   1.0274305  -0.6655895  -0.67889756 -0.30912882\n",
      "  0.          0.        ]\n",
      "episode 13-step 49, taking action 0, observation [ 0.3886408   0.9851631   1.0274117  -0.69227195 -0.69435376 -0.3091237\n",
      "  0.          0.        ]\n",
      "episode 13-step 50, taking action 0, observation [ 0.39879504  0.9691381   1.0273926  -0.7189541  -0.70980966 -0.30911863\n",
      "  0.          0.        ]\n",
      "episode 13-step 51, taking action 1, observation [ 0.40889606  0.95258075  1.0203803  -0.74184805 -0.7232711  -0.26922876\n",
      "  0.          0.        ]\n",
      "episode 13-step 52, taking action 0, observation [ 0.41899806  0.93542504  1.0203651  -0.76852626 -0.73673236 -0.2692254\n",
      "  0.          0.        ]\n",
      "episode 13-step 53, taking action 0, observation [ 0.42910117  0.91767126  1.0203497  -0.79520434 -0.7501935  -0.26922205\n",
      "  0.          0.        ]\n",
      "episode 13-step 54, taking action 2, observation [ 0.43951386  0.89970934  1.0510299  -0.8044392  -0.76343066 -0.26474407\n",
      "  0.          0.        ]\n",
      "episode 13-step 55, taking action 1, observation [ 0.44987735  0.88120764  1.0446686  -0.8276552  -0.77484626 -0.22831245\n",
      "  0.          0.        ]\n",
      "episode 13-step 56, taking action 3, observation [ 0.4602968   0.8620105   1.0519488  -0.85982436 -0.7887572  -0.27821878\n",
      "  0.          0.        ]\n",
      "episode 13-step 57, taking action 2, observation [ 0.4709792   0.8428501   1.0784705  -0.858547   -0.8032038  -0.28893283\n",
      "  0.          0.        ]\n",
      "episode 13-step 58, taking action 2, observation [ 0.48219556  0.8241112   1.1321114  -0.840169   -0.81820196 -0.29996237\n",
      "  0.          0.        ]\n",
      "episode 13-step 59, taking action 2, observation [ 0.4939619   0.80528945  1.1865467  -0.84368193 -0.8326251  -0.28846297\n",
      "  0.          0.        ]\n",
      "episode 13-step 60, taking action 0, observation [ 0.5057295   0.78586954  1.1865274  -0.87036043 -0.84704804 -0.28845882\n",
      "  0.          0.        ]\n",
      "episode 13-step 61, taking action 3, observation [ 0.5175422   0.7657661   1.1922958  -0.9019306  -0.8636226  -0.33149117\n",
      "  0.          0.        ]\n",
      "episode 13-step 62, taking action 2, observation [ 0.5299759   0.74612045  1.2545732  -0.88202435 -0.8808104  -0.34375566\n",
      "  0.          0.        ]\n",
      "episode 13-step 63, taking action 0, observation [ 0.5424116   0.7258773   1.2545446  -0.9087069  -0.89799786 -0.34374863\n",
      "  0.          0.        ]\n",
      "episode 13-step 64, taking action 0, observation [ 0.55484927  0.7050366   1.2545156  -0.9353892  -0.9151849  -0.3437416\n",
      "  0.          0.        ]\n",
      "episode 13-step 65, taking action 1, observation [ 0.5672439   0.6836768   1.248694   -0.9574432  -0.93027973 -0.30189663\n",
      "  0.          0.        ]\n",
      "episode 13-step 66, taking action 2, observation [ 0.58033514  0.66217077  1.3177121  -0.9636052  -0.94457036 -0.28581378\n",
      "  0.          0.        ]\n",
      "episode 13-step 67, taking action 0, observation [ 0.59342784  0.6400662   1.3176912  -0.9902821  -0.9588609  -0.28580981\n",
      "  0.          0.        ]\n",
      "episode 13-step 68, taking action 2, observation [ 0.6070803   0.618116    1.3737365  -0.98372746 -0.9735681  -0.29414505\n",
      "  0.          0.        ]\n",
      "episode 13-step 69, taking action 0, observation [ 0.6207344   0.5955676   1.373714   -1.0104045  -0.9882751  -0.29414067\n",
      "  0.          0.        ]\n",
      "episode 13-step 70, taking action 3, observation [ 0.63442266  0.57234794  1.3779469  -1.0413411  -1.0047935  -0.33036837\n",
      "  0.          0.        ]\n",
      "episode 13-step 71, taking action 1, observation [ 0.6480808   0.5486023   1.3737228  -1.0637923  -1.0195142  -0.29441258\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 13-step 72, taking action 3, observation [ 0.6617838   0.524139    1.3795149  -1.0974109  -1.0370772  -0.35126078\n",
      "  0.          0.        ]\n",
      "episode 13-step 73, taking action 0, observation [ 0.67548925  0.49907786  1.3794817  -1.1240909  -1.0546398  -0.35125324\n",
      "  0.          0.        ]\n",
      "episode 13-step 74, taking action 3, observation [ 0.68923914  0.4732989   1.3850211  -1.1577859  -1.0750561  -0.40832558\n",
      "  0.          0.        ]\n",
      "episode 13-step 75, taking action 3, observation [ 0.70302564  0.446844    1.3892362  -1.1891342  -1.097426   -0.4474004\n",
      "  0.          0.        ]\n",
      "episode 13-step 76, taking action 1, observation [ 0.7167746   0.419911    1.3837366  -1.2088071  -1.1169424  -0.39032578\n",
      "  0.          0.        ]\n",
      "episode 13-step 77, taking action 2, observation [ 0.7310087   0.39273527  1.4319402  -1.2197105  -1.1364641  -0.3904338\n",
      "  0.          0.        ]\n",
      "episode 13-step 78, taking action 2, observation [ 0.74555033  0.36544842  1.4626311  -1.2252344  -1.1567541  -0.40580082\n",
      "  0.          0.        ]\n",
      "episode 13-step 79, taking action 3, observation [ 0.7601305   0.33747405  1.4669651  -1.2572726  -1.1792473  -0.44986218\n",
      "  0.          0.        ]\n",
      "episode 13-step 80, taking action 3, observation [ 0.7747372   0.30881238  1.469908   -1.2892594  -1.2038192  -0.4914388\n",
      "  0.          0.        ]\n",
      "episode 13-step 81, taking action 1, observation [ 0.7893133   0.27967715  1.4652815  -1.3085959  -1.2254741  -0.43309826\n",
      "  0.          0.        ]\n",
      "episode 13-step 82, taking action 3, observation [ 0.80391866  0.24982804  1.4685991  -1.3421417  -1.2497951  -0.48641816\n",
      "  0.          0.        ]\n",
      "episode 13-step 83, taking action 1, observation [ 0.818494    0.21950807  1.464153   -1.3612739  -1.2711349  -0.42679495\n",
      "  0.          0.        ]\n",
      "episode 13-step 84, taking action 0, observation [ 0.83307326  0.18858977  1.4640983  -1.3879523  -1.2924739  -0.42678156\n",
      "  0.          0.        ]\n",
      "episode 13-step 85, taking action 0, observation [ 0.84765625  0.15707317  1.4640431  -1.4146298  -1.3138124  -0.42676815\n",
      "  0.          0.        ]\n",
      "episode 13-step 86, taking action 2, observation [ 0.86313516  0.12536581  1.5532167  -1.4232968  -1.3353177  -0.43010607\n",
      "  0.          0.        ]\n",
      "episode 13-step 87, taking action 2, observation [ 0.8789711   0.09314011  1.5884377  -1.4463179  -1.356682   -0.42728406\n",
      "  0.          0.        ]\n",
      "episode 13-step 88, taking action 3, observation [ 0.8948349   0.06020008  1.5912807  -1.4798951  -1.3807136  -0.48063374\n",
      "  1.          0.        ]\n",
      "episode 13-step 89, taking action 0, observation [ 0.90577924  0.05015846  0.53119725  0.16013105 -1.5557928  -0.20449515\n",
      "  1.          0.        ]\n",
      "episode 13-step 90, taking action 2, observation [ 0.91145533  0.05561408  0.5645305   0.2135737  -1.5896138  -0.6670705\n",
      "  1.          0.        ]\n",
      "episode 13-step 91, taking action 3, observation [ 0.9169632   0.06077565  0.55275804  0.20001134 -1.6230674  -0.60494816\n",
      "  1.          0.        ]\n",
      "episode 13-step 92, taking action 0, observation [ 0.922423    0.06541093  0.5440508   0.18740104 -1.6498024  -0.5335791\n",
      "  0.          0.        ]\n",
      "episode 13-step 93, taking action 3, observation [ 0.9276779   0.06987062  0.5260979   0.17809406 -1.6778173  -0.5152901\n",
      "  0.          0.        ]\n",
      "episode 13-step 94, taking action 1, observation [ 0.93289566  0.07399043  0.51945466  0.1679383  -1.700185   -0.4414068\n",
      "  0.          0.        ]\n",
      "episode 13-step 95, taking action 3, observation [ 0.9379236   0.07790481  0.499784    0.15964346 -1.7216258  -0.42802987\n",
      "  0.          0.        ]\n",
      "episode 13-step 96, taking action 2, observation [ 0.9430834   0.08170353  0.51311326  0.15689176 -1.7395427  -0.35822222\n",
      "  0.          0.        ]\n",
      "episode 13-step 97, taking action 2, observation [ 0.9488287   0.08560596  0.57229984  0.1650857  -1.7521108  -0.2513472\n",
      "  0.          0.        ]\n",
      "episode 13-step 98, taking action 0, observation [ 0.9544178   0.08933473  0.5568669   0.15850504 -1.7629946  -0.21766715\n",
      "  0.          0.        ]\n",
      "episode 13-step 99, taking action 1, observation [ 0.9600302   0.09259924  0.5597151   0.13994446 -1.7707648  -0.15540624\n",
      "  0.          0.        ]\n",
      "episode 14-step 0, taking action 1, observation [ 0.01514139  1.4314404   0.7587742   0.44296235 -0.01510508 -0.12571533\n",
      "  0.          0.        ]\n",
      "episode 14-step 1, taking action 0, observation [ 0.02266579  1.4408097   0.75879425  0.41634494 -0.02138681 -0.12564644\n",
      "  0.          0.        ]\n",
      "episode 14-step 2, taking action 0, observation [ 0.03019037  1.4495796   0.7588129   0.38967177 -0.02766728 -0.12562117\n",
      "  0.          0.        ]\n",
      "episode 14-step 3, taking action 0, observation [ 0.03771525  1.4577501   0.7588315   0.3629998  -0.03394675 -0.12560132\n",
      "  0.          0.        ]\n",
      "episode 14-step 4, taking action 3, observation [ 0.04530888  1.4653107   0.76747215  0.335821   -0.04195981 -0.16027603\n",
      "  0.          0.        ]\n",
      "episode 14-step 5, taking action 3, observation [ 0.0529748   1.4722788   0.7764991   0.30938882 -0.0517705  -0.1962323\n",
      "  0.          0.        ]\n",
      "episode 14-step 6, taking action 2, observation [ 0.06068707  1.4802034   0.781165    0.3518354  -0.06161493 -0.19690666\n",
      "  0.          0.        ]\n",
      "episode 14-step 7, taking action 1, observation [ 0.06832428  1.4875348   0.77173805  0.3254837  -0.06955849 -0.15888539\n",
      "  0.          0.        ]\n",
      "episode 14-step 8, taking action 0, observation [ 0.07596169  1.4942671   0.7717601   0.2988161  -0.07750165 -0.15887764\n",
      "  0.          0.        ]\n",
      "episode 14-step 9, taking action 2, observation [ 0.08363266  1.5018274   0.7752944   0.3355754  -0.08563466 -0.16267559\n",
      "  0.          0.        ]\n",
      "episode 14-step 10, taking action 1, observation [ 0.09121589  1.5088078   0.764253    0.30989277 -0.0915243  -0.11780356\n",
      "  0.          0.        ]\n",
      "episode 14-step 11, taking action 0, observation [ 0.09879923  1.5151889   0.76426923  0.28322652 -0.0974141  -0.11780678\n",
      "  0.          0.        ]\n",
      "episode 14-step 12, taking action 2, observation [ 0.10629072  1.5222156   0.75589406  0.31184015 -0.10411515 -0.13403346\n",
      "  0.          0.        ]\n",
      "episode 14-step 13, taking action 3, observation [ 0.11384859  1.5286407   0.7641979   0.28495568 -0.11247398 -0.1671916\n",
      "  0.          0.        ]\n",
      "episode 14-step 14, taking action 0, observation [ 0.12140694  1.5344667   0.7642225   0.25828493 -0.12083119 -0.1671592\n",
      "  0.          0.        ]\n",
      "episode 14-step 15, taking action 3, observation [ 0.12903424  1.5396891   0.77284867  0.23125456 -0.13091514 -0.20169744\n",
      "  0.          0.        ]\n",
      "episode 14-step 16, taking action 0, observation [ 0.136662    1.5443127   0.77287686  0.20457573 -0.14099678 -0.2016511\n",
      "  0.          0.        ]\n",
      "episode 14-step 17, taking action 2, observation [ 0.14426088  1.5495044   0.77062654  0.22969638 -0.15172352 -0.21455416\n",
      "  0.          0.        ]\n",
      "episode 14-step 18, taking action 3, observation [ 0.15193272  1.5540761   0.77976465  0.20184928 -0.16431883 -0.2519288\n",
      "  0.          0.        ]\n",
      "episode 14-step 19, taking action 1, observation [ 0.15953283  1.558071    0.7707025   0.17634225 -0.17504117 -0.21446581\n",
      "  0.          0.        ]\n",
      "episode 14-step 20, taking action 3, observation [ 0.16722803  1.5614432   0.782603    0.14827426 -0.18819678 -0.2631356\n",
      "  0.          0.        ]\n",
      "episode 14-step 21, taking action 1, observation [ 0.17482853  1.5642436   0.7706507   0.12307473 -0.1988849  -0.2137814\n",
      "  0.          0.        ]\n",
      "episode 14-step 22, taking action 0, observation [ 0.1824294   1.5664451   0.77067566  0.09639288 -0.2095729  -0.21377876\n",
      "  0.          0.        ]\n",
      "episode 14-step 23, taking action 1, observation [ 0.18993731  1.5680829   0.7589211   0.07162303 -0.21779996 -0.16455606\n",
      "  0.          0.        ]\n",
      "episode 14-step 24, taking action 3, observation [ 0.19750252  1.5690911   0.7667119   0.04326288 -0.22812742 -0.20655552\n",
      "  0.          0.        ]\n",
      "episode 14-step 25, taking action 3, observation [ 0.20515642  1.5694585   0.7773338   0.01443167 -0.24024956 -0.24244304\n",
      "  0.          0.        ]\n",
      "episode 14-step 26, taking action 3, observation [ 0.21288443  1.5691987   0.78662485 -0.0138683  -0.25432545 -0.28151768\n",
      "  0.          0.        ]\n",
      "episode 14-step 27, taking action 0, observation [ 0.22061281  1.5683414   0.7866188  -0.04055114 -0.26840115 -0.28151378\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 14-step 28, taking action 3, observation [ 0.22842398  1.5668623   0.79692537 -0.06871498 -0.28461617 -0.3243005\n",
      "  0.          0.        ]\n",
      "episode 14-step 29, taking action 0, observation [ 0.23623581  1.5647867   0.79691637 -0.09540293 -0.3008309  -0.3242945\n",
      "  0.          0.        ]\n",
      "episode 14-step 30, taking action 3, observation [ 0.24413642  1.5620801   0.80796003 -0.12410183 -0.31938443 -0.3710703\n",
      "  0.          0.        ]\n",
      "episode 14-step 31, taking action 1, observation [ 0.25197172  1.5588098   0.7996014  -0.14898974 -0.33613652 -0.3350415\n",
      "  0.          0.        ]\n",
      "episode 14-step 32, taking action 0, observation [ 0.25980777  1.5549428   0.79959    -0.17567879 -0.3528883  -0.3350351\n",
      "  0.          0.        ]\n",
      "episode 14-step 33, taking action 2, observation [ 0.2679178   1.551791    0.8270731  -0.14412323 -0.36985296 -0.3392935\n",
      "  0.          0.        ]\n",
      "episode 14-step 34, taking action 1, observation [ 0.27595282  1.5480922   0.81744325 -0.16806248 -0.38464588 -0.2958585\n",
      "  0.          0.        ]\n",
      "episode 14-step 35, taking action 1, observation [ 0.28391847  1.5438311   0.8086082  -0.19268915 -0.39750904 -0.2572631\n",
      "  0.          0.        ]\n",
      "episode 14-step 36, taking action 3, observation [ 0.29197073  1.5389141   0.8195407  -0.22261693 -0.41286734 -0.30716604\n",
      "  0.          0.        ]\n",
      "episode 14-step 37, taking action 1, observation [ 0.2999401   1.5334655   0.8088214  -0.2456813  -0.42571217 -0.2568966\n",
      "  0.          0.        ]\n",
      "episode 14-step 38, taking action 1, observation [ 0.3078332   1.5274678   0.7991056  -0.26956213 -0.43634987 -0.21275418\n",
      "  0.          0.        ]\n",
      "episode 14-step 39, taking action 1, observation [ 0.3156418   1.5209188   0.7884343  -0.2934429  -0.444606   -0.16512248\n",
      "  0.          0.        ]\n",
      "episode 14-step 40, taking action 3, observation [ 0.32351056  1.5137408   0.7959137  -0.32192233 -0.45450824 -0.1980448\n",
      "  0.          0.        ]\n",
      "episode 14-step 41, taking action 1, observation [ 0.33129683  1.506021    0.78542256 -0.34533623 -0.46198314 -0.14949805\n",
      "  0.          0.        ]\n",
      "episode 14-step 42, taking action 2, observation [ 0.33911467  1.498306    0.7891772  -0.34536967 -0.47015864 -0.16350962\n",
      "  0.          0.        ]\n",
      "episode 14-step 43, taking action 1, observation [ 0.3468481   1.4900486   0.7784904  -0.3687484  -0.47586358 -0.11409827\n",
      "  0.          0.        ]\n",
      "episode 14-step 44, taking action 1, observation [ 0.3545267   1.4812307   0.77152896 -0.39317432 -0.47994152 -0.08155895\n",
      "  0.          0.        ]\n",
      "episode 14-step 45, taking action 3, observation [ 0.36227712  1.4717451   0.78075254 -0.42358366 -0.48631105 -0.12738988\n",
      "  0.          0.        ]\n",
      "episode 14-step 46, taking action 1, observation [ 0.36996287  1.4617172   0.77246106 -0.44705838 -0.49064875 -0.08675385\n",
      "  0.          0.        ]\n",
      "episode 14-step 47, taking action 0, observation [ 0.37764874  1.4510895   0.77245986 -0.47372645 -0.49498641 -0.08675372\n",
      "  0.          0.        ]\n",
      "episode 14-step 48, taking action 3, observation [ 0.3853937   1.4398122   0.77998036 -0.5032045  -0.50115395 -0.12334891\n",
      "  0.          0.        ]\n",
      "episode 14-step 49, taking action 3, observation [ 0.39321774  1.427872    0.7900267  -0.5334842  -0.50974137 -0.1717484\n",
      "  0.          0.        ]\n",
      "episode 14-step 50, taking action 3, observation [ 0.40112343  1.4152659   0.8003505  -0.56394964 -0.52083325 -0.2218376\n",
      "  0.          0.        ]\n",
      "episode 14-step 51, taking action 2, observation [ 0.40912715  1.4027829   0.81071043 -0.5588112  -0.5326406  -0.23614648\n",
      "  0.          0.        ]\n",
      "episode 14-step 52, taking action 1, observation [ 0.41708413  1.3897548   0.8045702  -0.58254075 -0.5428243  -0.2036748\n",
      "  0.          0.        ]\n",
      "episode 14-step 53, taking action 2, observation [ 0.42529136  1.377023    0.82991266 -0.56959015 -0.55344445 -0.21240155\n",
      "  0.          0.        ]\n",
      "episode 14-step 54, taking action 3, observation [ 0.43356848  1.3636359   0.8386692  -0.59954864 -0.5662092  -0.25529516\n",
      "  0.          0.        ]\n",
      "episode 14-step 55, taking action 3, observation [ 0.441924    1.3495684   0.84865534 -0.63085437 -0.58160126 -0.30784088\n",
      "  0.          0.        ]\n",
      "episode 14-step 56, taking action 0, observation [ 0.45028067  1.3349037   0.84863913 -0.65753776 -0.596993   -0.30783588\n",
      "  0.          0.        ]\n",
      "episode 14-step 57, taking action 2, observation [ 0.45913592  1.3204635   0.89797133 -0.64750105 -0.61190706 -0.2982802\n",
      "  0.          0.        ]\n",
      "episode 14-step 58, taking action 1, observation [ 0.46791697  1.3055034   0.8883047  -0.66973335 -0.62427354 -0.24732979\n",
      "  0.          0.        ]\n",
      "episode 14-step 59, taking action 3, observation [ 0.47676143  1.2898655   0.8964488  -0.70086384 -0.63894856 -0.29349983\n",
      "  0.          0.        ]\n",
      "episode 14-step 60, taking action 3, observation [ 0.4856637  1.2735684  0.9036849 -0.7310859 -0.6555866 -0.3327603\n",
      "  0.         0.       ]\n",
      "episode 14-step 61, taking action 1, observation [ 0.49450475  1.256733    0.8957475  -0.7542951  -0.67016774 -0.2916233\n",
      "  0.          0.        ]\n",
      "episode 14-step 62, taking action 2, observation [ 0.5034531   1.2398934   0.90694714 -0.7549083  -0.6855002  -0.30664936\n",
      "  0.          0.        ]\n",
      "episode 14-step 63, taking action 3, observation [ 0.5124637   1.2223852   0.91471565 -0.7857085  -0.703024   -0.3504764\n",
      "  0.          0.        ]\n",
      "episode 14-step 64, taking action 0, observation [ 0.5214761   1.2042801   0.91469085 -0.81239504 -0.7205475  -0.35046902\n",
      "  0.          0.        ]\n",
      "episode 14-step 65, taking action 0, observation [ 0.5304903   1.1855781   0.9146656  -0.8390812  -0.73807055 -0.35046154\n",
      "  0.          0.        ]\n",
      "episode 14-step 66, taking action 2, observation [ 0.540178    1.1671906   0.98149824 -0.8250717  -0.75518674 -0.3423235\n",
      "  0.          0.        ]\n",
      "episode 14-step 67, taking action 3, observation [ 0.5499299   1.1481172   0.9895525  -0.85686666 -0.7748033  -0.39233184\n",
      "  0.          0.        ]\n",
      "episode 14-step 68, taking action 1, observation [ 0.55961245  1.12854     0.9803767  -0.87812656 -0.7916771  -0.3374766\n",
      "  0.          0.        ]\n",
      "episode 14-step 69, taking action 1, observation [ 0.5692356   1.1084577   0.9724415  -0.8994887  -0.8060079  -0.28661615\n",
      "  0.          0.        ]\n",
      "episode 14-step 70, taking action 2, observation [ 0.5791413   1.0884062   1.0008923  -0.89845395 -0.82083714 -0.296586\n",
      "  0.          0.        ]\n",
      "episode 14-step 71, taking action 2, observation [ 0.5892743   1.068275    1.0237744  -0.9023234  -0.83612335 -0.30572385\n",
      "  0.          0.        ]\n",
      "episode 14-step 72, taking action 3, observation [ 0.5994525   1.047479    1.0293639  -0.93291634 -0.8532587  -0.34270757\n",
      "  0.          0.        ]\n",
      "episode 14-step 73, taking action 3, observation [ 0.6096777   1.0260205   1.0350469  -0.9634378  -0.87223727 -0.37957057\n",
      "  0.          0.        ]\n",
      "episode 14-step 74, taking action 0, observation [ 0.6199053   1.0039651   1.0350124  -0.99012417 -0.89121526 -0.37956116\n",
      "  0.          0.        ]\n",
      "episode 14-step 75, taking action 2, observation [ 0.63039875  0.98197645  1.0618203  -0.9877489  -0.9110191  -0.39607608\n",
      "  0.          0.        ]\n",
      "episode 14-step 76, taking action 3, observation [ 0.640938    0.95929474  1.0675229  -1.0200053  -0.93318874 -0.44339353\n",
      "  0.          0.        ]\n",
      "episode 14-step 77, taking action 3, observation [ 0.6515287   0.93593115  1.0736096  -1.0517433  -0.95761955 -0.48861796\n",
      "  0.          0.        ]\n",
      "episode 14-step 78, taking action 1, observation [ 0.66208917  0.9120572   1.0689126  -1.0735085  -0.97999257 -0.44746074\n",
      "  0.          0.        ]\n",
      "episode 14-step 79, taking action 2, observation [ 0.6732788   0.88794225  1.1310194  -1.0840108  -1.0016638  -0.43342605\n",
      "  0.          0.        ]\n",
      "episode 14-step 80, taking action 2, observation [ 0.68523824  0.8636829   1.2071912  -1.0901788  -1.0226     -0.41872534\n",
      "  0.          0.        ]\n",
      "episode 14-step 81, taking action 0, observation [ 0.6972008  0.8388265  1.2071443 -1.116865  -1.0435356 -0.4187127\n",
      "  0.         0.       ]\n",
      "episode 14-step 82, taking action 3, observation [ 0.7092037   0.81327397  1.2119747  -1.1493642  -1.0668586  -0.46646005\n",
      "  0.          0.        ]\n",
      "episode 14-step 83, taking action 2, observation [ 0.72165394  0.78768843  1.256494   -1.1513256  -1.0907121  -0.4770678\n",
      "  0.          0.        ]\n",
      "episode 14-step 84, taking action 2, observation [ 0.73502845  0.76198643  1.3480779  -1.1562873  -1.113914   -0.46403784\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 14-step 85, taking action 2, observation [ 0.74930346  0.7364512   1.4377558  -1.1492108  -1.1373994  -0.4697086\n",
      "  0.          0.        ]\n",
      "episode 14-step 86, taking action 2, observation [ 0.76452714  0.71080357  1.5319531  -1.1540608  -1.160406   -0.46013418\n",
      "  0.          0.        ]\n",
      "episode 14-step 87, taking action 3, observation [ 0.7797886   0.68445426  1.5361836  -1.1869438  -1.1858994  -0.50987047\n",
      "  0.          0.        ]\n",
      "episode 14-step 88, taking action 0, observation [ 0.7950554   0.65750813  1.5361078  -1.2136315  -1.2113917  -0.50984764\n",
      "  0.          0.        ]\n",
      "episode 14-step 89, taking action 3, observation [ 0.81035805  0.62987554  1.5397937  -1.2456692  -1.2390381  -0.5529273\n",
      "  0.          0.        ]\n",
      "episode 14-step 90, taking action 3, observation [ 0.82569236  0.60152763  1.5430388  -1.279387   -1.2694086  -0.60741097\n",
      "  0.          0.        ]\n",
      "episode 14-step 91, taking action 3, observation [ 0.8410538  0.5724545  1.5456634 -1.3136989 -1.3026865 -0.6655563\n",
      "  0.         0.       ]\n",
      "episode 14-step 92, taking action 1, observation [ 0.85640013  0.5428975   1.5424466  -1.3336968  -1.3333622  -0.61351496\n",
      "  0.          0.        ]\n",
      "episode 14-step 93, taking action 3, observation [ 0.87177145  0.5126155   1.5446339  -1.3679786  -1.3669302  -0.67136055\n",
      "  0.          0.        ]\n",
      "episode 14-step 94, taking action 0, observation [ 0.8871525   0.48173648  1.5444949  -1.3946662  -1.4004956  -0.67130846\n",
      "  0.          0.        ]\n",
      "episode 14-step 95, taking action 0, observation [ 0.90254366  0.45026     1.5443548  -1.4213505  -1.4340584  -0.6712564\n",
      "  0.          0.        ]\n",
      "episode 14-step 96, taking action 0, observation [ 0.9179443   0.41818544  1.5442141  -1.4480318  -1.4676187  -0.6712043\n",
      "  0.          0.        ]\n",
      "episode 14-step 97, taking action 2, observation [ 0.93369865  0.38571334  1.5784543  -1.4660858  -1.5016285  -0.6801968\n",
      "  0.          0.        ]\n",
      "episode 14-step 98, taking action 0, observation [ 0.94946325  0.35264212  1.5783086  -1.492761   -1.5356356  -0.68014264\n",
      "  0.          0.        ]\n",
      "episode 14-step 99, taking action 2, observation [ 0.9660841   0.3189243   1.6627859  -1.5213395  -1.5693574  -0.67443675\n",
      "  0.          0.        ]\n",
      "episode 15-step 0, taking action 3, observation [-0.00827513  1.3973558  -0.41355747 -0.3141383   0.0079      0.06121917\n",
      "  0.          0.        ]\n",
      "episode 15-step 1, taking action 0, observation [-0.01237993  1.3896877  -0.41356677 -0.34082478  0.01095893  0.06118419\n",
      "  0.          0.        ]\n",
      "episode 15-step 2, taking action 3, observation [-0.01642408  1.3814186  -0.4059673  -0.36752433  0.01249147  0.03065366\n",
      "  0.          0.        ]\n",
      "episode 15-step 3, taking action 3, observation [-0.02037172  1.3725451  -0.39385343 -0.39436764  0.01159547 -0.01792193\n",
      "  0.          0.        ]\n",
      "episode 15-step 4, taking action 1, observation [-0.02439041  1.36308    -0.40276456 -0.42067748  0.01248614  0.01781494\n",
      "  0.          0.        ]\n",
      "episode 15-step 5, taking action 0, observation [-0.028409    1.353015   -0.40276837 -0.44734603  0.01337536  0.017786\n",
      "  0.          0.        ]\n",
      "episode 15-step 6, taking action 3, observation [-0.03236637  1.3423407  -0.3950751  -0.47440365  0.01272402 -0.01302808\n",
      "  0.          0.        ]\n",
      "episode 15-step 7, taking action 2, observation [-0.03616695  1.3321533  -0.38018823 -0.45278114  0.01286294  0.00277827\n",
      "  0.          0.        ]\n",
      "episode 15-step 8, taking action 1, observation [-0.04005175  1.3213649  -0.39076632 -0.4795093   0.01512137  0.04517267\n",
      "  0.          0.        ]\n",
      "episode 15-step 9, taking action 2, observation [-0.04410248  1.3111504  -0.4066185  -0.4539841   0.01664831  0.03054146\n",
      "  0.          0.        ]\n",
      "episode 15-step 10, taking action 0, observation [-0.04815311  1.3003359  -0.40662208 -0.48066208  0.01817608  0.03055866\n",
      "  0.          0.        ]\n",
      "episode 15-step 11, taking action 2, observation [-0.05219221  1.2898333  -0.40556106 -0.46681085  0.0198107   0.03269576\n",
      "  0.          0.        ]\n",
      "episode 15-step 12, taking action 0, observation [-0.05623131  1.2787302  -0.4055656  -0.49348694  0.02144503  0.03268944\n",
      "  0.          0.        ]\n",
      "episode 15-step 13, taking action 2, observation [-0.06040573  1.268223   -0.4185248  -0.46700248  0.0225226   0.02155346\n",
      "  0.          0.        ]\n",
      "episode 15-step 14, taking action 2, observation [-0.06477394  1.2586069  -0.43709436 -0.42738634  0.0227939   0.00542674\n",
      "  0.          0.        ]\n",
      "episode 15-step 15, taking action 3, observation [-0.06906214  1.2483975  -0.4270595  -0.45372546  0.0210524  -0.03483318\n",
      "  0.          0.        ]\n",
      "episode 15-step 16, taking action 3, observation [-0.07328586  1.2375872  -0.41898355 -0.48041165  0.01769518 -0.06715092\n",
      "  0.          0.        ]\n",
      "episode 15-step 17, taking action 3, observation [-0.07742043  1.2261856  -0.40779257 -0.50668836  0.01209591 -0.11199562\n",
      "  0.          0.        ]\n",
      "episode 15-step 18, taking action 3, observation [-0.08146648  1.2141858  -0.3966879  -0.5332824   0.00427632 -0.15640606\n",
      "  0.          0.        ]\n",
      "episode 15-step 19, taking action 3, observation [-0.08544655  1.2015836  -0.38843775 -0.56009483 -0.00518961 -0.18933578\n",
      "  0.          0.        ]\n",
      "episode 15-step 20, taking action 3, observation [-0.08936186  1.1883911  -0.38031644 -0.5864087  -0.01627466 -0.22172067\n",
      "  0.          0.        ]\n",
      "episode 15-step 21, taking action 3, observation [-0.09320259  1.1745944  -0.3709793  -0.61337644 -0.02922337 -0.25899762\n",
      "  0.          0.        ]\n",
      "episode 15-step 22, taking action 3, observation [-0.09698276  1.1602082  -0.3634094  -0.6397377  -0.04367426 -0.28904396\n",
      "  0.          0.        ]\n",
      "episode 15-step 23, taking action 0, observation [-0.10076246  1.1452246  -0.36336863 -0.6664249  -0.05812262 -0.28899306\n",
      "  0.          0.        ]\n",
      "episode 15-step 24, taking action 0, observation [-0.10454254  1.1296222  -0.36341047 -0.6940647  -0.07257018 -0.28895113\n",
      "  0.          0.        ]\n",
      "episode 15-step 25, taking action 3, observation [-0.1082591   1.1134213  -0.35546356 -0.72089875 -0.08860923 -0.320781\n",
      "  0.          0.        ]\n",
      "episode 15-step 26, taking action 0, observation [-0.11197548  1.0966239  -0.3554662  -0.74758685 -0.10464801 -0.3207752\n",
      "  0.          0.        ]\n",
      "episode 15-step 27, taking action 0, observation [-0.11569166  1.07923    -0.3554693  -0.7742749  -0.12068648 -0.32076955\n",
      "  0.          0.        ]\n",
      "episode 15-step 28, taking action 3, observation [-0.11933003  1.0612307  -0.34575528 -0.80152893 -0.13868232 -0.35991672\n",
      "  0.          0.        ]\n",
      "episode 15-step 29, taking action 2, observation [-0.1230277   1.0432177  -0.35121816 -0.8024086  -0.15720013 -0.37035614\n",
      "  0.          0.        ]\n",
      "episode 15-step 30, taking action 0, observation [-0.126725    1.024609   -0.35122448 -0.8291036  -0.17571752 -0.3703477\n",
      "  0.          0.        ]\n",
      "episode 15-step 31, taking action 2, observation [-0.13030176  1.0067782  -0.33904058 -0.79479563 -0.19442888 -0.3742272\n",
      "  0.          0.        ]\n",
      "episode 15-step 32, taking action 0, observation [-0.13387784  0.9883521  -0.33904862 -0.8214911  -0.21313979 -0.3742183\n",
      "  0.          0.        ]\n",
      "episode 15-step 33, taking action 0, observation [-0.13745336  0.9693306  -0.33905748 -0.8481865  -0.23185025 -0.37420928\n",
      "  0.          0.        ]\n",
      "episode 15-step 34, taking action 0, observation [-0.14102812  0.9497134  -0.33906713 -0.87488174 -0.25056022 -0.3742002\n",
      "  0.          0.        ]\n",
      "episode 15-step 35, taking action 2, observation [-0.14434166  0.9310148  -0.31303504 -0.8342803  -0.2692608  -0.37401116\n",
      "  0.          0.        ]\n",
      "episode 15-step 36, taking action 2, observation [-0.1473546   0.9126838  -0.28361493 -0.81807685 -0.28741527 -0.36308938\n",
      "  0.          0.        ]\n",
      "episode 15-step 37, taking action 0, observation [-0.1503667   0.8937568  -0.28362623 -0.8447701  -0.30556932 -0.36308116\n",
      "  0.          0.        ]\n",
      "episode 15-step 38, taking action 3, observation [-0.15328893  0.87418514 -0.27236038 -0.8741624  -0.32618392 -0.41229185\n",
      "  0.          0.        ]\n",
      "episode 15-step 39, taking action 1, observation [-0.15628843  0.854061   -0.28230783 -0.8984969  -0.34462744 -0.36887053\n",
      "  0.          0.        ]\n",
      "episode 15-step 40, taking action 0, observation [-0.15928698  0.83334106 -0.28232187 -0.92519057 -0.36307055 -0.36886188\n",
      "  0.          0.        ]\n",
      "episode 15-step 41, taking action 3, observation [-0.16221562  0.81198853 -0.2736563  -0.95399857 -0.38342196 -0.4070279\n",
      "  0.          0.        ]\n",
      "episode 15-step 42, taking action 1, observation [-0.16522026  0.7900994  -0.2835596  -0.9775022  -0.40148506 -0.36126202\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 15-step 43, taking action 3, observation [-0.1681469   0.7675633  -0.27382553 -1.007074   -0.42177027 -0.40570417\n",
      "  0.          0.        ]\n",
      "episode 15-step 44, taking action 3, observation [-0.17100315  0.74437624 -0.26502687 -1.0368552  -0.4441434  -0.44746327\n",
      "  0.          0.        ]\n",
      "episode 15-step 45, taking action 3, observation [-0.17378168  0.7205326  -0.25540048 -1.0670488  -0.46882796 -0.49369135\n",
      "  0.          0.        ]\n",
      "episode 15-step 46, taking action 3, observation [-0.17649956  0.69604844 -0.2480021  -1.0964558  -0.49529934 -0.5294277\n",
      "  0.          0.        ]\n",
      "episode 15-step 47, taking action 2, observation [-0.17895165  0.6720356  -0.22133581 -1.0760862  -0.5222588  -0.53918993\n",
      "  0.          0.        ]\n",
      "episode 15-step 48, taking action 0, observation [-0.18140039  0.6474311  -0.2213802  -1.1028061  -0.549217   -0.539163\n",
      "  0.          0.        ]\n",
      "episode 15-step 49, taking action 1, observation [-0.18392906  0.622301   -0.23192112 -1.1256478  -0.5736169  -0.48799887\n",
      "  0.          0.        ]\n",
      "episode 15-step 50, taking action 1, observation [-0.18653098  0.59665596 -0.24173209 -1.1479042  -0.5954594  -0.43684918\n",
      "  0.          0.        ]\n",
      "episode 15-step 51, taking action 1, observation [-0.1892151   0.5704893  -0.25242248 -1.170319   -0.6146267  -0.3833461\n",
      "  0.          0.        ]\n",
      "episode 15-step 52, taking action 3, observation [-0.19184741  0.5436714  -0.24605992 -1.200155   -0.6355237  -0.41793966\n",
      "  0.          0.        ]\n",
      "episode 15-step 53, taking action 2, observation [-0.19411334  0.5167714  -0.21000826 -1.2038726  -0.65600413 -0.4096089\n",
      "  0.          0.        ]\n",
      "episode 15-step 54, taking action 1, observation [-0.19642982  0.48933414 -0.21679953 -1.2271879  -0.6746328  -0.37257293\n",
      "  0.          0.        ]\n",
      "episode 15-step 55, taking action 1, observation [-0.19880418  0.4613731  -0.2245092  -1.2497131  -0.69107705 -0.32888478\n",
      "  0.          0.        ]\n",
      "episode 15-step 56, taking action 3, observation [-0.20113096  0.4327497  -0.218519   -1.280093   -0.7093386  -0.36523056\n",
      "  0.          0.        ]\n",
      "episode 15-step 57, taking action 2, observation [-0.20304032  0.40384042 -0.17763354 -1.2925915  -0.72676635 -0.34855443\n",
      "  0.          0.        ]\n",
      "episode 15-step 58, taking action 3, observation [-0.20488405  0.37426272 -0.16956967 -1.323495   -0.7464623  -0.39391875\n",
      "  0.          0.        ]\n",
      "episode 15-step 59, taking action 2, observation [-0.20616007  0.34487516 -0.11326678 -1.3151028  -0.7658855  -0.38846427\n",
      "  0.          0.        ]\n",
      "episode 15-step 60, taking action 3, observation [-0.20738463  0.31481302 -0.10689439 -1.3462698  -0.787405   -0.43039066\n",
      "  0.          0.        ]\n",
      "episode 15-step 61, taking action 1, observation [-0.20864716  0.2842179  -0.11222367 -1.3693422  -0.80720735 -0.39604634\n",
      "  0.          0.        ]\n",
      "episode 15-step 62, taking action 0, observation [-0.20990714  0.25302637 -0.11225893 -1.3960319  -0.82700914 -0.3960355\n",
      "  0.          0.        ]\n",
      "episode 15-step 63, taking action 3, observation [-0.21110836  0.2211588  -0.10514355 -1.4274071  -0.84908193 -0.44145554\n",
      "  0.          0.        ]\n",
      "episode 15-step 64, taking action 3, observation [-0.21225563  0.18860736 -0.09860297 -1.4592129  -0.87347424 -0.48784724\n",
      "  0.          0.        ]\n",
      "episode 15-step 65, taking action 1, observation [-0.21343274  0.15553287 -0.1031836  -1.4817861  -0.8960893  -0.45230103\n",
      "  0.          0.        ]\n",
      "episode 15-step 66, taking action 2, observation [-0.21381387  0.12273696 -0.02409955 -1.4695213  -0.9185199  -0.44861302\n",
      "  0.          0.        ]\n",
      "episode 15-step 67, taking action 0, observation [-0.21419144  0.08934518 -0.02414938 -1.496214   -0.9409498  -0.44859752\n",
      "  0.          0.        ]\n",
      "episode 15-step 68, taking action 0, observation [-0.21456537  0.05535723 -0.02420005 -1.522906   -0.9633789  -0.448582\n",
      "  1.          0.        ]\n",
      "episode 15-step 69, taking action 0, observation [-0.21497731  0.0211356  -0.02703292 -1.5420877  -0.9903507  -0.52442515\n",
      "  1.          0.        ]\n",
      "episode 15-step 70, taking action 0, observation [-2.1565600e-01  1.0618067e-03  4.2397719e-02 -1.1075380e+00\n",
      " -1.2536464e+00 -4.9068441e+00  1.0000000e+00  0.0000000e+00]\n",
      "episode 15-step 71, taking action 3, observation [-0.21488638 -0.01201264  0.07144656 -0.9829844  -1.5692095  -5.784882\n",
      "  1.          0.        ]\n",
      "episode 15-step 72, taking action 1, observation [-0.2145681  -0.00511961  0.01296154  0.2896507  -1.8021932  -2.415057\n",
      "  0.          0.        ]\n",
      "episode 15-step 73, taking action 2, observation [-0.2133338   0.00363987  0.09482343  0.29421026 -1.9392693  -2.551454\n",
      "  0.          0.        ]\n",
      "episode 15-step 74, taking action 2, observation [-0.21134415  0.01103068  0.14659569  0.24842228 -2.0676548  -2.5420172\n",
      "  0.          0.        ]\n",
      "episode 15-step 75, taking action 0, observation [-0.20921955  0.01768363  0.14487469  0.22248162 -2.1948137  -2.5397658\n",
      "  0.          0.        ]\n",
      "episode 15-step 76, taking action 1, observation [-0.20694566  0.02367519  0.1477403   0.20060131 -2.3199205  -2.5016441\n",
      "  0.          0.        ]\n",
      "episode 15-step 77, taking action 2, observation [-0.2041111   0.02808875  0.19258119  0.13786642 -2.4444373  -2.490264\n",
      "  0.          0.        ]\n",
      "episode 15-step 78, taking action 1, observation [-0.20112786  0.03184162  0.19957967  0.117488   -2.5662467  -2.4361727\n",
      "  0.          0.        ]\n",
      "episode 15-step 79, taking action 1, observation [-0.19799213  0.03490213  0.20905802  0.09591804 -2.685317   -2.3813713\n",
      "  0.          0.        ]\n",
      "episode 15-step 80, taking action 2, observation [-0.19452581  0.03658305  0.2355161   0.04311652 -2.8045871  -2.3853674\n",
      "  0.          0.        ]\n",
      "episode 15-step 81, taking action 2, observation [-0.19069433  0.03612944  0.26726252 -0.04269775 -2.9237728  -2.3836837\n",
      "  0.          0.        ]\n",
      "episode 15-step 82, taking action 1, observation [-0.18674345  0.03492287  0.27857396 -0.06662621 -3.040408   -2.3326566\n",
      "  0.          0.        ]\n",
      "episode 15-step 83, taking action 1, observation [-0.18268538  0.03295048  0.29039872 -0.09154399 -3.1545026  -2.2818317\n",
      "  0.          0.        ]\n",
      "episode 15-step 84, taking action 2, observation [-0.17854643  0.02925733  0.2982459  -0.15921131 -3.2689147  -2.288178\n",
      "  0.          0.        ]\n",
      "episode 15-step 85, taking action 0, observation [-0.17442341  0.02479299  0.298371   -0.18473016 -3.3832242  -2.2861273\n",
      "  0.          0.        ]\n",
      "episode 15-step 86, taking action 1, observation [-0.17025022  0.01952714  0.30856597 -0.21228322 -3.4953299  -2.242041\n",
      "  0.          0.        ]\n",
      "episode 15-step 87, taking action 3, observation [-0.16618662  0.01353022  0.30034077 -0.23628202 -3.6091857  -2.2770505\n",
      "  0.          0.        ]\n",
      "episode 15-step 88, taking action 1, observation [-0.16248026  0.01567899  0.32853836  0.07196735 -3.655378   -1.0439293\n",
      "  0.          0.        ]\n",
      "episode 15-step 89, taking action 3, observation [-0.15881872  0.01666468  0.32067946  0.0618619  -3.707626   -1.044365\n",
      "  0.          0.        ]\n",
      "episode 15-step 90, taking action 3, observation [-0.15521678  0.01735169  0.31635806  0.05006107 -3.759666   -1.0406573\n",
      "  0.          0.        ]\n",
      "episode 15-step 91, taking action 3, observation [-0.1517067   0.01775037  0.30789518  0.03920141 -3.812802   -1.0626377\n",
      "  0.          0.        ]\n",
      "episode 15-step 92, taking action 0, observation [-0.14820175  0.01783706  0.3102998   0.0261642  -3.864439   -1.0326648\n",
      "  0.          0.        ]\n",
      "episode 15-step 93, taking action 2, observation [-0.145082    0.0176458   0.27612793  0.01350735 -3.912545   -0.9620625\n",
      "  0.          0.        ]\n",
      "episode 15-step 94, taking action 1, observation [-1.4190808e-01  1.7229661e-02  2.8531256e-01  3.1362502e-03\n",
      " -3.9575887e+00 -9.0082073e-01  0.0000000e+00  0.0000000e+00]\n",
      "episode 15-step 95, taking action 2, observation [-0.13897619  0.01555232  0.26213306 -0.05161903 -4.0033355  -0.91493285\n",
      "  0.          0.        ]\n",
      "episode 15-step 96, taking action 2, observation [-0.13641958  0.01470366  0.22661813 -0.01481483 -4.048416   -0.9007061\n",
      "  0.          0.        ]\n",
      "episode 15-step 97, taking action 0, observation [-0.13384953  0.01361569  0.22942385 -0.02385617 -4.093764   -0.90695417\n",
      "  0.          0.        ]\n",
      "episode 15-step 98, taking action 2, observation [-0.13189955  0.01226436  0.1682732  -0.03389631 -4.140632   -0.93731153\n",
      "  0.          0.        ]\n",
      "episode 15-step 99, taking action 3, observation [-0.12999888  0.0105795   0.16369185 -0.04615873 -4.190506   -0.99747753\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 16-step 0, taking action 1, observation [-0.01079979  1.4282694  -0.5516445   0.37269717  0.01412901  0.15816174\n",
      "  0.          0.        ]\n",
      "episode 16-step 1, taking action 2, observation [-0.01614485  1.4375727  -0.5430111   0.4133747   0.02254009  0.16823743\n",
      "  0.          0.        ]\n",
      "episode 16-step 2, taking action 0, observation [-0.0214901   1.4462767  -0.5430356   0.38668644  0.03095005  0.16821492\n",
      "  0.          0.        ]\n",
      "episode 16-step 3, taking action 1, observation [-0.02692738  1.454369   -0.5545684   0.3594006   0.04167283  0.2144754\n",
      "  0.          0.        ]\n",
      "episode 16-step 4, taking action 0, observation [-0.03236513  1.4618627  -0.55460066  0.33272442  0.05239283  0.21441977\n",
      "  0.          0.        ]\n",
      "episode 16-step 5, taking action 2, observation [-0.03794584  1.4703217  -0.56846726  0.37555438  0.06269185  0.20599934\n",
      "  0.          0.        ]\n",
      "episode 16-step 6, taking action 0, observation [-0.04352684  1.4781817  -0.5684955   0.34886503  0.07298957  0.20597307\n",
      "  0.          0.        ]\n",
      "episode 16-step 7, taking action 1, observation [-0.04918833  1.4854465  -0.5785495   0.32223     0.08528875  0.24600637\n",
      "  0.          0.        ]\n",
      "episode 16-step 8, taking action 0, observation [-0.05485029  1.4921131  -0.57858527  0.2955494   0.09758504  0.24594828\n",
      "  0.          0.        ]\n",
      "episode 16-step 9, taking action 1, observation [-0.06057443  1.4981635  -0.58637035  0.26792967  0.11146093  0.27754354\n",
      "  0.          0.        ]\n",
      "episode 16-step 10, taking action 2, observation [-0.06646328  1.5050219  -0.6025437   0.30374536  0.12505439  0.27189422\n",
      "  0.          0.        ]\n",
      "episode 16-step 11, taking action 3, observation [-0.07228775  1.5112953  -0.5944132   0.27776968  0.13698845  0.23870268\n",
      "  0.          0.        ]\n",
      "episode 16-step 12, taking action 2, observation [-0.07807817  1.5184207  -0.5917765   0.3154649   0.14970489  0.2543522\n",
      "  0.          0.        ]\n",
      "episode 16-step 13, taking action 1, observation [-0.08396216  1.5249217  -0.6034982   0.28734002  0.16481575  0.30224472\n",
      "  0.          0.        ]\n",
      "episode 16-step 14, taking action 2, observation [-0.08997355  1.5320793  -0.6162511   0.31637004  0.17998064  0.30332547\n",
      "  0.          0.        ]\n",
      "episode 16-step 15, taking action 3, observation [-0.09590807  1.5386709  -0.60649085  0.29131916  0.19310033  0.26241747\n",
      "  0.          0.        ]\n",
      "episode 16-step 16, taking action 0, observation [-0.10184326  1.5446646  -0.6065241   0.26463786  0.20621881  0.2623933\n",
      "  0.          0.        ]\n",
      "episode 16-step 17, taking action 2, observation [-0.10793428  1.5505764  -0.6217014   0.26093683  0.21895294  0.25470573\n",
      "  0.          0.        ]\n",
      "episode 16-step 18, taking action 0, observation [-0.11402607  1.5558901  -0.6217313   0.23424634  0.2316854   0.25467193\n",
      "  0.          0.        ]\n",
      "episode 16-step 19, taking action 0, observation [-0.12011862  1.5606058  -0.6217628   0.20756148  0.24441567  0.25462812\n",
      "  0.          0.        ]\n",
      "episode 16-step 20, taking action 1, observation [-0.1262761   1.5646906  -0.6299416   0.17912856  0.25888792  0.2894713\n",
      "  0.          0.        ]\n",
      "episode 16-step 21, taking action 2, observation [-0.13257942  1.5695213  -0.64486724  0.2120675   0.27377123  0.29769367\n",
      "  0.          0.        ]\n",
      "episode 16-step 22, taking action 2, observation [-0.13909121  1.5752895  -0.665997    0.25352585  0.2890082   0.30476713\n",
      "  0.          0.        ]\n",
      "episode 16-step 23, taking action 3, observation [-0.14554195  1.5804913  -0.65818745  0.22853369  0.30255324  0.2709247\n",
      "  0.          0.        ]\n",
      "episode 16-step 24, taking action 2, observation [-0.1522131   1.5855556  -0.67965925  0.2224135   0.3155503   0.25994083\n",
      "  0.          0.        ]\n",
      "episode 16-step 25, taking action 0, observation [-0.15888481  1.590022   -0.6796528   0.19573337  0.3285472   0.25993788\n",
      "  0.          0.        ]\n",
      "episode 16-step 26, taking action 3, observation [-0.16547327  1.5939307  -0.66910857  0.17135829  0.33926558  0.21436748\n",
      "  0.          0.        ]\n",
      "episode 16-step 27, taking action 0, observation [-0.17206211  1.5972409  -0.66910386  0.14468254  0.34998387  0.21436569\n",
      "  0.          0.        ]\n",
      "episode 16-step 28, taking action 3, observation [-0.17858009  1.5999899  -0.66012055  0.12012798  0.35873827  0.17508802\n",
      "  0.          0.        ]\n",
      "episode 16-step 29, taking action 1, observation [-0.18516436  1.6021075  -0.66841763  0.09157617  0.36929828  0.21119992\n",
      "  0.          0.        ]\n",
      "episode 16-step 30, taking action 0, observation [-0.1917489   1.6036263  -0.6684127   0.06490079  0.3798582   0.21119829\n",
      "  0.          0.        ]\n",
      "episode 16-step 31, taking action 3, observation [-0.19824143  1.6045923  -0.65678984  0.04090519  0.38788214  0.16047832\n",
      "  0.          0.        ]\n",
      "episode 16-step 32, taking action 2, observation [-0.20500474  1.606337   -0.68425983  0.07535587  0.3963539   0.16943505\n",
      "  0.          0.        ]\n",
      "episode 16-step 33, taking action 3, observation [-0.21170397  1.6075282  -0.67607677  0.0512136   0.40294534  0.13182859\n",
      "  0.          0.        ]\n",
      "episode 16-step 34, taking action 0, observation [-0.2184033   1.60812    -0.6760746   0.02454359  0.40953675  0.13182819\n",
      "  0.          0.        ]\n",
      "episode 16-step 35, taking action 1, observation [-0.2251626   1.608084   -0.6835576  -0.00382964  0.41775912  0.16444719\n",
      "  0.          0.        ]\n",
      "episode 16-step 36, taking action 0, observation [-0.23192215  1.607449   -0.6835542  -0.03050148  0.42598143  0.1644464\n",
      "  0.          0.        ]\n",
      "episode 16-step 37, taking action 2, observation [-0.23872295  1.6069038  -0.68829983 -0.02674438  0.4349088   0.17854781\n",
      "  0.          0.        ]\n",
      "episode 16-step 38, taking action 1, observation [-0.24558726  1.6057105  -0.6963605  -0.05614773  0.44573572  0.21653838\n",
      "  0.          0.        ]\n",
      "episode 16-step 39, taking action 2, observation [-0.25300828  1.6050951  -0.75127906 -0.03030476  0.45580482  0.2013812\n",
      "  0.          0.        ]\n",
      "episode 16-step 40, taking action 1, observation [-0.26050678  1.6038244  -0.7610684  -0.06018981  0.4681691   0.24728458\n",
      "  0.          0.        ]\n",
      "episode 16-step 41, taking action 1, observation [-0.26808238  1.6019013  -0.7707342  -0.08998106  0.48279276  0.29247338\n",
      "  0.          0.        ]\n",
      "episode 16-step 42, taking action 3, observation [-0.27557254  1.5994469  -0.75976735 -0.11289359  0.494812    0.24038482\n",
      "  0.          0.        ]\n",
      "episode 16-step 43, taking action 0, observation [-0.28306326  1.5963941  -0.7597588  -0.13957092  0.50683105  0.24038246\n",
      "  0.          0.        ]\n",
      "episode 16-step 44, taking action 3, observation [-0.2904802   1.592803   -0.7502898  -0.16281006  0.5165633   0.1946444\n",
      "  0.          0.        ]\n",
      "episode 16-step 45, taking action 0, observation [-0.29789752  1.5886132  -0.7502839  -0.18948363  0.5262954   0.19464314\n",
      "  0.          0.        ]\n",
      "episode 16-step 46, taking action 1, observation [-0.3053975   1.5837522  -0.7607651  -0.22026631  0.5386233   0.2465572\n",
      "  0.          0.        ]\n",
      "episode 16-step 47, taking action 2, observation [-0.31348982  1.5795419  -0.81959295 -0.19129992  0.5505768   0.23906915\n",
      "  0.          0.        ]\n",
      "episode 16-step 48, taking action 2, observation [-0.3219161   1.575165   -0.85238266 -0.1985609   0.5619127   0.22671692\n",
      "  0.          0.        ]\n",
      "episode 16-step 49, taking action 1, observation [-0.3303947   1.5701308  -0.8590727  -0.22851346  0.57504356  0.26261652\n",
      "  0.          0.        ]\n",
      "episode 16-step 50, taking action 3, observation [-0.3388061  1.564577  -0.8502547 -0.250812   0.585794   0.2150083\n",
      "  0.         0.       ]\n",
      "episode 16-step 51, taking action 0, observation [-0.34721813  1.5584244  -0.8502467  -0.27748674  0.5965443   0.21500655\n",
      "  0.          0.        ]\n",
      "episode 16-step 52, taking action 1, observation [-0.35570583  1.5515978  -0.85983056 -0.3084757   0.6097992   0.2650972\n",
      "  0.          0.        ]\n",
      "episode 16-step 53, taking action 3, observation [-0.36411768  1.5442387  -0.8501542  -0.331287    0.6206313   0.21664134\n",
      "  0.          0.        ]\n",
      "episode 16-step 54, taking action 0, observation [-0.37253013  1.5362808  -0.85014564 -0.35796168  0.6314632   0.21663955\n",
      "  0.          0.        ]\n",
      "episode 16-step 55, taking action 3, observation [-0.38086993  1.5277979  -0.840804   -0.38037074  0.63983554  0.16744664\n",
      "  0.          0.        ]\n",
      "episode 16-step 56, taking action 3, observation [-0.38914257  1.5187968  -0.8320826  -0.40246436  0.645775    0.11879008\n",
      "  0.          0.        ]\n",
      "episode 16-step 57, taking action 0, observation [-0.39741534  1.5091958  -0.83208    -0.42913342  0.6517145   0.11878977\n",
      "  0.          0.        ]\n",
      "episode 16-step 58, taking action 3, observation [-0.40561587  1.4990754  -0.82278615 -0.45118472  0.65511686  0.0680467\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 16-step 59, taking action 0, observation [-0.41381645  1.4883554  -0.82278526 -0.47785217  0.6585192   0.06804662\n",
      "  0.          0.        ]\n",
      "episode 16-step 60, taking action 1, observation [-0.42207044  1.4769785  -0.82959396 -0.5078096   0.6637603   0.10482242\n",
      "  0.          0.        ]\n",
      "episode 16-step 61, taking action 1, observation [-0.430376    1.4649417  -0.83620137 -0.53793156  0.67084765  0.14174661\n",
      "  0.          0.        ]\n",
      "episode 16-step 62, taking action 2, observation [-0.43913707  1.4527982  -0.88116324 -0.5424001   0.6772298   0.12764323\n",
      "  0.          0.        ]\n",
      "episode 16-step 63, taking action 1, observation [-0.4479505   1.4399953  -0.8878335  -0.5725227   0.6854706   0.16481695\n",
      "  0.          0.        ]\n",
      "episode 16-step 64, taking action 1, observation [-0.45681754  1.4265354  -0.8945862  -0.60255486  0.69556236  0.20183618\n",
      "  0.          0.        ]\n",
      "episode 16-step 65, taking action 0, observation [-0.4656852   1.4124764  -0.8945781  -0.62922806  0.7056541   0.20183477\n",
      "  0.          0.        ]\n",
      "episode 16-step 66, taking action 2, observation [-0.47490454  1.3986905  -0.930075   -0.6173765   0.7162524   0.21196544\n",
      "  0.          0.        ]\n",
      "episode 16-step 67, taking action 0, observation [-0.4841245   1.3843057  -0.93006575 -0.6440503   0.72685057  0.21196377\n",
      "  0.          0.        ]\n",
      "episode 16-step 68, taking action 1, observation [-0.49339956  1.3692565  -0.93698376 -0.6745368   0.73944205  0.2518295\n",
      "  0.          0.        ]\n",
      "episode 16-step 69, taking action 2, observation [-0.50296164  1.3542733  -0.96588135 -0.6718715   0.7524357   0.25987223\n",
      "  0.          0.        ]\n",
      "episode 16-step 70, taking action 1, observation [-0.5125806   1.338603   -0.97318697 -0.7035998   0.7678034   0.3073535\n",
      "  0.          0.        ]\n",
      "episode 16-step 71, taking action 0, observation [-0.52220094  1.322335   -0.9731666  -0.7302807   0.7831708   0.30734852\n",
      "  0.          0.        ]\n",
      "episode 16-step 72, taking action 3, observation [-0.5317815   1.3055458  -0.9676296  -0.7526299   0.7966006   0.26859564\n",
      "  0.          0.        ]\n",
      "episode 16-step 73, taking action 3, observation [-0.54131263  1.2882318  -0.9611229  -0.775027    0.80796593  0.22730677\n",
      "  0.          0.        ]\n",
      "episode 16-step 74, taking action 3, observation [-0.55080026  1.2704021  -0.95522165 -0.7969691   0.817224    0.18516223\n",
      "  0.          0.        ]\n",
      "episode 16-step 75, taking action 0, observation [-0.56028855  1.2519732  -0.9552137  -0.8236407   0.82648206  0.18516111\n",
      "  0.          0.        ]\n",
      "episode 16-step 76, taking action 2, observation [-0.57043266  1.2340057  -1.0209796  -0.803341    0.8360918   0.19219533\n",
      "  0.          0.        ]\n",
      "episode 16-step 77, taking action 2, observation [-0.5810059   1.2163677  -1.0643431  -0.789141    0.84649765  0.20811634\n",
      "  0.          0.        ]\n",
      "episode 16-step 78, taking action 0, observation [-0.5915797   1.1981308  -1.0643331  -0.8158137   0.8569034   0.20811482\n",
      "  0.          0.        ]\n",
      "episode 16-step 79, taking action 2, observation [-0.60254616  1.1799207  -1.1036932  -0.81480986  0.8675803   0.21353722\n",
      "  0.          0.        ]\n",
      "episode 16-step 80, taking action 1, observation [-0.61356884  1.1610168  -1.1108487  -0.84700114  0.8807745   0.26388234\n",
      "  0.          0.        ]\n",
      "episode 16-step 81, taking action 1, observation [-0.62463164  1.1414416  -1.1159241  -0.87790185  0.8958479   0.30146837\n",
      "  0.          0.        ]\n",
      "episode 16-step 82, taking action 1, observation [-0.6357505   1.1211604  -1.1230465  -0.91084033  0.91366065  0.35625434\n",
      "  0.          0.        ]\n",
      "episode 16-step 83, taking action 3, observation [-0.6468342   1.1003523  -1.1181684  -0.9333967   0.9296465   0.31971666\n",
      "  0.          0.        ]\n",
      "episode 16-step 84, taking action 1, observation [-0.6579634  1.078851  -1.1239142 -0.9656109  0.9479982  0.367035\n",
      "  0.         0.       ]\n",
      "episode 16-step 85, taking action 2, observation [-0.66988754  1.0575936  -1.2031205  -0.9548828   0.96631753  0.3663875\n",
      "  0.          0.        ]\n",
      "episode 16-step 86, taking action 2, observation [-0.6824215   1.0365216  -1.2640322  -0.94699264  0.9850256   0.374161\n",
      "  0.          0.        ]\n",
      "episode 16-step 87, taking action 0, observation [-0.694958    1.014852   -1.2639959  -0.97367567  1.0037332   0.37415197\n",
      "  0.          0.        ]\n",
      "episode 16-step 88, taking action 2, observation [-0.70828074  0.99327767 -1.342241   -0.9694913   1.022294    0.37121883\n",
      "  0.          0.        ]\n",
      "episode 16-step 89, taking action 1, observation [-0.721638   0.9710086 -1.3465644 -1.0018116  1.0431265  0.4166491\n",
      "  0.         0.       ]\n",
      "episode 16-step 90, taking action 2, observation [-0.73588014  0.94879246 -1.4345067  -0.99941283  1.0636365   0.4102018\n",
      "  0.          0.        ]\n",
      "episode 16-step 91, taking action 3, observation [-0.7500902   0.92608327 -1.4297477  -1.019992    1.081673    0.36072877\n",
      "  0.          0.        ]\n",
      "episode 16-step 92, taking action 2, observation [-0.7647349   0.90307933 -1.4728742  -1.0331322   1.0995945   0.35842916\n",
      "  0.          0.        ]\n",
      "episode 16-step 93, taking action 0, observation [-0.7793822   0.8794773  -1.4728384  -1.0598112   1.1175156   0.35842127\n",
      "  0.          0.        ]\n",
      "episode 16-step 94, taking action 1, observation [-0.79407215  0.85516363 -1.4779834  -1.0932019   1.1381681   0.41305107\n",
      "  0.          0.        ]\n",
      "episode 16-step 95, taking action 3, observation [-0.80873823  0.8303617  -1.4741818  -1.113479    1.1563098   0.36283478\n",
      "  0.          0.        ]\n",
      "episode 16-step 96, taking action 1, observation [-0.82344323  0.80483264 -1.4789398  -1.1477547   1.1774629   0.4230618\n",
      "  0.          0.        ]\n",
      "episode 16-step 97, taking action 3, observation [-0.8381285   0.7787965  -1.4757712  -1.1690819   1.1965095   0.38093042\n",
      "  0.          0.        ]\n",
      "episode 16-step 98, taking action 2, observation [-0.8537851   0.752709   -1.5725367  -1.1714089   1.2154869   0.37954703\n",
      "  0.          0.        ]\n",
      "episode 16-step 99, taking action 2, observation [-0.8703358   0.72624606 -1.6613395  -1.1876819   1.2336731   0.3637251\n",
      "  0.          0.        ]\n",
      "episode 17-step 0, taking action 3, observation [ 0.00816097  1.4244024   0.4199892   0.28690594 -0.01167541 -0.1402159\n",
      "  0.          0.        ]\n",
      "episode 17-step 1, taking action 1, observation [ 0.01220932  1.4302571   0.409873    0.26016673 -0.01664906 -0.09948238\n",
      "  0.          0.        ]\n",
      "episode 17-step 2, taking action 2, observation [ 0.01636457  1.4364619   0.42008418  0.27571645 -0.0211624  -0.09027505\n",
      "  0.          0.        ]\n",
      "episode 17-step 3, taking action 1, observation [ 0.02044993  1.4420608   0.41131067  0.24879745 -0.02391536 -0.05506434\n",
      "  0.          0.        ]\n",
      "episode 17-step 4, taking action 3, observation [ 0.02462778  1.4470459   0.42293873  0.22146858 -0.02900397 -0.10178149\n",
      "  0.          0.        ]\n",
      "episode 17-step 5, taking action 2, observation [ 0.02896795  1.4521302   0.4384365   0.22587648 -0.03338181 -0.08756506\n",
      "  0.          0.        ]\n",
      "episode 17-step 6, taking action 0, observation [ 0.03330822  1.4566146   0.43844882  0.19920085 -0.03776003 -0.08757228\n",
      "  0.          0.        ]\n",
      "episode 17-step 7, taking action 1, observation [ 0.03757296  1.4605122   0.4289739   0.17315449 -0.04022748 -0.04935355\n",
      "  0.          0.        ]\n",
      "episode 17-step 8, taking action 0, observation [ 0.04183779  1.4638096   0.42897964  0.14648542 -0.04269538 -0.04936238\n",
      "  0.          0.        ]\n",
      "episode 17-step 9, taking action 3, observation [ 0.0461772   1.4664934   0.43834177  0.11915052 -0.04704699 -0.0870403\n",
      "  0.          0.        ]\n",
      "episode 17-step 10, taking action 3, observation [ 0.05060644  1.4685761   0.44959426  0.09233341 -0.05365031 -0.13207863\n",
      "  0.          0.        ]\n",
      "episode 17-step 11, taking action 1, observation [ 0.05497207  1.4700595   0.44160652  0.06574839 -0.0586476  -0.09995499\n",
      "  0.          0.        ]\n",
      "episode 17-step 12, taking action 1, observation [ 0.05925655  1.4709623   0.43140998  0.04000545 -0.06158166 -0.05868639\n",
      "  0.          0.        ]\n",
      "episode 17-step 13, taking action 0, observation [ 0.06354103  1.4712653   0.43141717  0.01333578 -0.06451581 -0.05868828\n",
      "  0.          0.        ]\n",
      "episode 17-step 14, taking action 2, observation [ 0.06769256  1.471773    0.4189036   0.02239898 -0.06823652 -0.07442114\n",
      "  0.          0.        ]\n",
      "episode 17-step 15, taking action 1, observation [ 0.07177095  1.471679    0.40974396 -0.00426468 -0.07012243 -0.03772151\n",
      "  0.          0.        ]\n",
      "episode 17-step 16, taking action 0, observation [ 0.07584934  1.470985    0.40974802 -0.03093073 -0.07200917 -0.03773821\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 17-step 17, taking action 2, observation [ 0.08010159  1.4712273   0.4266584   0.01069694 -0.07342819 -0.02838344\n",
      "  0.          0.        ]\n",
      "episode 17-step 18, taking action 3, observation [ 0.08445168  1.4708575   0.4389301  -0.01663117 -0.07731695 -0.07778241\n",
      "  0.          0.        ]\n",
      "episode 17-step 19, taking action 0, observation [ 0.08880186  1.469888    0.43894276 -0.04329579 -0.08120396 -0.07774699\n",
      "  0.          0.        ]\n",
      "episode 17-step 20, taking action 0, observation [ 0.09315233  1.4683186   0.4389531  -0.06996616 -0.08509091 -0.07774565\n",
      "  0.          0.        ]\n",
      "episode 17-step 21, taking action 2, observation [ 0.09737368  1.4669652   0.42686248 -0.06043158 -0.08979056 -0.0940014\n",
      "  0.          0.        ]\n",
      "episode 17-step 22, taking action 3, observation [ 0.10167055  1.4650061   0.43633372 -0.08748168 -0.09638847 -0.13196988\n",
      "  0.          0.        ]\n",
      "episode 17-step 23, taking action 3, observation [ 0.10604067  1.462429    0.44552523 -0.11510205 -0.1048485  -0.1692158\n",
      "  0.          0.        ]\n",
      "episode 17-step 24, taking action 2, observation [ 0.11045675  1.4599805   0.45016617 -0.10944863 -0.11333101 -0.16965047\n",
      "  0.          0.        ]\n",
      "episode 17-step 25, taking action 0, observation [ 0.11487303  1.4569329   0.45016518 -0.13612129 -0.12181348 -0.16964963\n",
      "  0.          0.        ]\n",
      "episode 17-step 26, taking action 2, observation [ 0.11947546  1.4538107   0.46811146 -0.13942887 -0.12963766 -0.15648368\n",
      "  0.          0.        ]\n",
      "episode 17-step 27, taking action 2, observation [ 0.12426214  1.4507083   0.48591247 -0.13852872 -0.13686417 -0.14453039\n",
      "  0.          0.        ]\n",
      "episode 17-step 28, taking action 2, observation [ 0.12911396  1.4485458   0.49278635 -0.0968327  -0.14443836 -0.1514837\n",
      "  0.          0.        ]\n",
      "episode 17-step 29, taking action 3, observation [ 0.13406143  1.4457645   0.5047656  -0.12461196 -0.15444702 -0.20017318\n",
      "  0.          0.        ]\n",
      "episode 17-step 30, taking action 1, observation [ 0.13892059  1.4424155   0.49364218 -0.14966932 -0.1621607  -0.15427336\n",
      "  0.          0.        ]\n",
      "episode 17-step 31, taking action 0, observation [ 0.14377995  1.4384673   0.493641   -0.17634088 -0.16987433 -0.1542727\n",
      "  0.          0.        ]\n",
      "episode 17-step 32, taking action 1, observation [ 0.14854737  1.433949    0.48205796 -0.2014288  -0.17520061 -0.10652594\n",
      "  0.          0.        ]\n",
      "episode 17-step 33, taking action 1, observation [ 0.15324602  1.42885     0.4733997  -0.22703692 -0.17874974 -0.0709824\n",
      "  0.          0.        ]\n",
      "episode 17-step 34, taking action 1, observation [ 0.15787439  1.4231772   0.4645585  -0.25233334 -0.18046431 -0.03429145\n",
      "  0.          0.        ]\n",
      "episode 17-step 35, taking action 0, observation [ 0.16250286  1.4169044   0.46455842 -0.27900022 -0.18217888 -0.03429164\n",
      "  0.          0.        ]\n",
      "episode 17-step 36, taking action 1, observation [ 0.16706781  1.4100404   0.45660204 -0.30508128 -0.18228652 -0.00215262\n",
      "  0.          0.        ]\n",
      "episode 17-step 37, taking action 2, observation [ 0.17171498  1.4033618   0.46489018 -0.29684836 -0.18245873 -0.00344414\n",
      "  0.          0.        ]\n",
      "episode 17-step 38, taking action 1, observation [ 0.17626496  1.3961065   0.45267645 -0.32217935 -0.18013403  0.04649416\n",
      "  0.          0.        ]\n",
      "episode 17-step 39, taking action 0, observation [ 0.18081494  1.3882512   0.45267624 -0.34884647 -0.17780933  0.04649431\n",
      "  0.          0.        ]\n",
      "episode 17-step 40, taking action 2, observation [ 0.18555184  1.3812237   0.47129542 -0.31204572 -0.1754117   0.0479527\n",
      "  0.          0.        ]\n",
      "episode 17-step 41, taking action 2, observation [ 0.19037084  1.3748765   0.4798255  -0.2818463  -0.1733282   0.04166985\n",
      "  0.          0.        ]\n",
      "episode 17-step 42, taking action 1, observation [ 0.19512415  1.3679464   0.4715623  -0.307573   -0.16955343  0.07549578\n",
      "  0.          0.        ]\n",
      "episode 17-step 43, taking action 2, observation [ 0.19999886  1.3609551   0.48342854 -0.31026062 -0.16550523  0.08096372\n",
      "  0.          0.        ]\n",
      "episode 17-step 44, taking action 2, observation [ 0.20489569  1.3540082   0.48586813 -0.308337   -0.16168286  0.07644743\n",
      "  0.          0.        ]\n",
      "episode 17-step 45, taking action 0, observation [ 0.20979261  1.3464613   0.48586783 -0.33500487 -0.1578605   0.07644734\n",
      "  0.          0.        ]\n",
      "episode 17-step 46, taking action 3, observation [ 0.21475725  1.338298    0.49437824 -0.3625944  -0.15577608  0.04168808\n",
      "  0.          0.        ]\n",
      "episode 17-step 47, taking action 3, observation [ 0.21978875  1.3295082   0.50281155 -0.39062113 -0.15544002  0.00672169\n",
      "  0.          0.        ]\n",
      "episode 17-step 48, taking action 0, observation [ 0.22482033  1.3201184   0.5028116  -0.4172878  -0.15510394  0.00672151\n",
      "  0.          0.        ]\n",
      "episode 17-step 49, taking action 2, observation [ 2.2993965e-01  1.3116460e+00  5.1192605e-01 -3.7655362e-01\n",
      " -1.5508804e-01  3.1805778e-04  0.0000000e+00  0.0000000e+00]\n",
      "episode 17-step 50, taking action 3, observation [ 0.23514137  1.3025588   0.52224725 -0.404092   -0.15716515 -0.04154194\n",
      "  0.          0.        ]\n",
      "episode 17-step 51, taking action 3, observation [ 0.24042587  1.292851    0.53265774 -0.431914   -0.16136974 -0.0840918\n",
      "  0.          0.        ]\n",
      "episode 17-step 52, taking action 3, observation [ 0.2457798   1.2825387   0.54132587 -0.45898083 -0.1673118  -0.11884135\n",
      "  0.          0.        ]\n",
      "episode 17-step 53, taking action 2, observation [ 0.2511607   1.2725023   0.54435515 -0.44677576 -0.17357968 -0.12535784\n",
      "  0.          0.        ]\n",
      "episode 17-step 54, taking action 1, observation [ 0.2564476   1.261889    0.53252304 -0.47216088 -0.17742994 -0.07700519\n",
      "  0.          0.        ]\n",
      "episode 17-step 55, taking action 3, observation [ 0.2617956   1.2506577   0.5402181  -0.4998276  -0.18286377 -0.10867663\n",
      "  0.          0.        ]\n",
      "episode 17-step 56, taking action 2, observation [ 0.26725465  1.2395431   0.5512177  -0.494645   -0.18819386 -0.10660209\n",
      "  0.          0.        ]\n",
      "episode 17-step 57, taking action 1, observation [ 0.27264374  1.2278552   0.54238653 -0.5199069  -0.19168715 -0.0698663\n",
      "  0.          0.        ]\n",
      "episode 17-step 58, taking action 1, observation [ 0.2779394   1.2155938   0.5306337  -0.54508823 -0.19276296 -0.02151641\n",
      "  0.          0.        ]\n",
      "episode 17-step 59, taking action 0, observation [ 0.28323507  1.2027326   0.5306337  -0.571755   -0.19383878 -0.02151628\n",
      "  0.          0.        ]\n",
      "episode 17-step 60, taking action 0, observation [ 0.28853065  1.1892712   0.5306337  -0.5984217  -0.1949146  -0.02151602\n",
      "  0.          0.        ]\n",
      "episode 17-step 61, taking action 2, observation [ 0.29407158  1.1765542   0.5548498  -0.5652971  -0.19567968 -0.01530181\n",
      "  0.          0.        ]\n",
      "episode 17-step 62, taking action 1, observation [ 0.29954776  1.1632478   0.54674834 -0.5912802  -0.19480321  0.01752932\n",
      "  0.          0.        ]\n",
      "episode 17-step 63, taking action 3, observation [ 0.30508557  1.1493292   0.5544622  -0.6186971  -0.19549713 -0.01387836\n",
      "  0.          0.        ]\n",
      "episode 17-step 64, taking action 0, observation [ 0.31062326  1.1348106   0.5544622  -0.64536375 -0.19619104 -0.01387839\n",
      "  0.          0.        ]\n",
      "episode 17-step 65, taking action 2, observation [ 0.31645623  1.1212007   0.58355653 -0.6049164  -0.19645162 -0.00521157\n",
      "  0.          0.        ]\n",
      "episode 17-step 66, taking action 2, observation [ 0.3225543   1.1082072   0.60959816 -0.57746494 -0.19624093  0.00421376\n",
      "  0.          0.        ]\n",
      "episode 17-step 67, taking action 2, observation [ 0.32887298  1.0957158   0.6313225  -0.5551021  -0.19569145  0.01098952\n",
      "  0.          0.        ]\n",
      "episode 17-step 68, taking action 0, observation [ 0.33519173  1.0826244   0.6313225  -0.58176875 -0.19514199  0.01098945\n",
      "  0.          0.        ]\n",
      "episode 17-step 69, taking action 1, observation [ 0.34141892  1.0689561   0.6198433  -0.6071023  -0.19223967  0.05804621\n",
      "  0.          0.        ]\n",
      "episode 17-step 70, taking action 3, observation [ 0.34774     1.0546588   0.63163346 -0.63537616 -0.19177109  0.00937177\n",
      "  0.          0.        ]\n",
      "episode 17-step 71, taking action 3, observation [ 0.35414094  1.0397296   0.6417244  -0.66372764 -0.19340783 -0.03273456\n",
      "  0.          0.        ]\n",
      "episode 17-step 72, taking action 3, observation [ 0.36060923  1.0241864   0.65017176 -0.6912558  -0.19676721 -0.06718771\n",
      "  0.          0.        ]\n",
      "episode 17-step 73, taking action 1, observation [ 0.3670044   1.0080678   0.640949   -0.71657234 -0.19821544 -0.02896469\n",
      "  0.          0.        ]\n",
      "episode 17-step 74, taking action 1, observation [ 0.3733117   0.99138206  0.6298934  -0.7414771  -0.19736177  0.01707308\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 17-step 75, taking action 1, observation [ 0.37953082  0.9741111   0.61882627 -0.7671915  -0.19426444  0.06194686\n",
      "  0.          0.        ]\n",
      "episode 17-step 76, taking action 0, observation [ 0.3857499   0.9562403   0.61882603 -0.79385895 -0.1911671   0.06194685\n",
      "  0.          0.        ]\n",
      "episode 17-step 77, taking action 1, observation [ 0.39187852  0.937801    0.60742414 -0.81883013 -0.18570612  0.10921942\n",
      "  0.          0.        ]\n",
      "episode 17-step 78, taking action 1, observation [ 0.3979331   0.9187855   0.5980925  -0.8442282  -0.17831993  0.14772415\n",
      "  0.          0.        ]\n",
      "episode 17-step 79, taking action 0, observation [ 0.4039876   0.89917076  0.59809124 -0.8708993  -0.17093374  0.14772362\n",
      "  0.          0.        ]\n",
      "episode 17-step 80, taking action 3, observation [ 0.4101224   0.8789416   0.6081371  -0.8984697  -0.16558804  0.10691414\n",
      "  0.          0.        ]\n",
      "episode 17-step 81, taking action 2, observation [ 0.41653958  0.85938424  0.6357306  -0.8685619  -0.15959959  0.11976911\n",
      "  0.          0.        ]\n",
      "episode 17-step 82, taking action 3, observation [ 0.42303476  0.83920354  0.64554495 -0.8964981  -0.1556279   0.07943352\n",
      "  0.          0.        ]\n",
      "episode 17-step 83, taking action 0, observation [ 0.42952996  0.8184231   0.64554465 -0.92316604 -0.15165624  0.07943341\n",
      "  0.          0.        ]\n",
      "episode 17-step 84, taking action 1, observation [ 0.4359625   0.79705954  0.6376625  -0.9489343  -0.14607282  0.11166845\n",
      "  0.          0.        ]\n",
      "episode 17-step 85, taking action 0, observation [ 0.44239512  0.77509636  0.6376618  -0.97560364 -0.14048943  0.11166819\n",
      "  0.          0.        ]\n",
      "episode 17-step 86, taking action 2, observation [ 0.44907483  0.7538392   0.6618017  -0.9441927  -0.13433729  0.12304316\n",
      "  0.          0.        ]\n",
      "episode 17-step 87, taking action 3, observation [ 0.45581788  0.73197544  0.66972935 -0.9713166  -0.1297815   0.0911155\n",
      "  0.          0.        ]\n",
      "episode 17-step 88, taking action 2, observation [ 0.46268693  0.7108718   0.6822912  -0.93754333 -0.12518989  0.09183232\n",
      "  0.          0.        ]\n",
      "episode 17-step 89, taking action 2, observation [ 0.4697508   0.6905562   0.7014123  -0.90250593 -0.12023763  0.09904495\n",
      "  0.          0.        ]\n",
      "episode 17-step 90, taking action 0, observation [ 0.47681475  0.6696409   0.7014119  -0.9291746  -0.11528538  0.09904478\n",
      "  0.          0.        ]\n",
      "episode 17-step 91, taking action 0, observation [ 0.4838786   0.648126    0.7014116  -0.95584327 -0.11033315  0.09904462\n",
      "  0.          0.        ]\n",
      "episode 17-step 92, taking action 2, observation [ 0.490942    0.6266895   0.7015815  -0.9523906  -0.10561382  0.09438659\n",
      "  0.          0.        ]\n",
      "episode 17-step 93, taking action 1, observation [ 0.49791422  0.6046693   0.690139   -0.97819227 -0.09858125  0.14065105\n",
      "  0.          0.        ]\n",
      "episode 17-step 94, taking action 2, observation [ 0.50511515  0.5832254   0.712304   -0.95256966 -0.09085215  0.15458193\n",
      "  0.          0.        ]\n",
      "episode 17-step 95, taking action 2, observation [ 0.51235104  0.5627411   0.7160672  -0.9099753  -0.08339787  0.14908549\n",
      "  0.          0.        ]\n",
      "episode 17-step 96, taking action 1, observation [ 0.51952744  0.54167604  0.7085725  -0.9357406  -0.07441945  0.17956844\n",
      "  0.          0.        ]\n",
      "episode 17-step 97, taking action 1, observation [ 0.5266101   0.52003056  0.69679004 -0.9614906  -0.06306215  0.22714594\n",
      "  0.          0.        ]\n",
      "episode 17-step 98, taking action 0, observation [ 0.53369296  0.49778667  0.69678885 -0.98816794 -0.05170497  0.22714393\n",
      "  0.          0.        ]\n",
      "episode 17-step 99, taking action 2, observation [ 0.5408265   0.47562957  0.7017376  -0.9843977  -0.04022215  0.2296566\n",
      "  0.          0.        ]\n",
      "episode 18-step 0, taking action 3, observation [-0.01349401  1.4331874  -0.6758989   0.4819654   0.01336841  0.10987119\n",
      "  0.          0.        ]\n",
      "episode 18-step 1, taking action 0, observation [-0.02019768  1.4434342  -0.6759163   0.45535168  0.01885816  0.10980477\n",
      "  0.          0.        ]\n",
      "episode 18-step 2, taking action 1, observation [-0.02697248  1.4530814  -0.68484616  0.4286517   0.02613425  0.14553571\n",
      "  0.          0.        ]\n",
      "episode 18-step 3, taking action 3, observation [-0.03365736  1.4621282  -0.67353874  0.40198806  0.0311372   0.10006807\n",
      "  0.          0.        ]\n",
      "episode 18-step 4, taking action 3, observation [-0.04027281  1.4705863  -0.6648254   0.37583497  0.03438571  0.06497614\n",
      "  0.          0.        ]\n",
      "episode 18-step 5, taking action 2, observation [-0.0467906   1.4798658  -0.6556927   0.41233823  0.03825477  0.07738833\n",
      "  0.          0.        ]\n",
      "episode 18-step 6, taking action 2, observation [-0.05321226  1.4894772  -0.6466525   0.42704418  0.04269354  0.08878358\n",
      "  0.          0.        ]\n",
      "episode 18-step 7, taking action 0, observation [-0.05963411  1.4984885  -0.6466651   0.40037236  0.04713142  0.08876557\n",
      "  0.          0.        ]\n",
      "episode 18-step 8, taking action 1, observation [-0.06614838  1.50689    -0.6582626   0.37317201  0.0538982   0.13534817\n",
      "  0.          0.        ]\n",
      "episode 18-step 9, taking action 2, observation [-0.0728611   1.5160177  -0.67738307  0.40543875  0.05994941  0.12103502\n",
      "  0.          0.        ]\n",
      "episode 18-step 10, taking action 2, observation [-0.07947435  1.5260334  -0.6681889   0.4448593   0.06675243  0.13607274\n",
      "  0.          0.        ]\n",
      "episode 18-step 11, taking action 3, observation [-0.0860054   1.5354683  -0.65784067  0.419109    0.07145669  0.09409365\n",
      "  0.          0.        ]\n",
      "episode 18-step 12, taking action 0, observation [-0.09253645  1.5443037  -0.65785366  0.39244428  0.07616141  0.09410308\n",
      "  0.          0.        ]\n",
      "episode 18-step 13, taking action 1, observation [-0.09914102  1.5525267  -0.66706485  0.36511835  0.08272205  0.13122465\n",
      "  0.          0.        ]\n",
      "episode 18-step 14, taking action 2, observation [-0.10578012  1.5613515  -0.6706749   0.39182523  0.08943006  0.1341726\n",
      "  0.          0.        ]\n",
      "episode 18-step 15, taking action 0, observation [-0.11241941  1.5695766  -0.6706921   0.36514276  0.09613735  0.13415784\n",
      "  0.          0.        ]\n",
      "episode 18-step 16, taking action 2, observation [-0.11919336  1.5780722  -0.6837743   0.37715355  0.10247721  0.12680861\n",
      "  0.          0.        ]\n",
      "episode 18-step 17, taking action 2, observation [-0.12613955  1.58736    -0.70063317  0.41237074  0.10845891  0.1196445\n",
      "  0.          0.        ]\n",
      "episode 18-step 18, taking action 3, observation [-0.13301048  1.5960524  -0.6911979   0.38602245  0.11254397  0.0817088\n",
      "  0.          0.        ]\n",
      "episode 18-step 19, taking action 0, observation [-0.13988152  1.6041449  -0.6912088   0.35935715  0.11662903  0.08170886\n",
      "  0.          0.        ]\n",
      "episode 18-step 20, taking action 1, observation [-0.14683504  1.611629   -0.7015404   0.33211976  0.1227916   0.12326231\n",
      "  0.          0.        ]\n",
      "episode 18-step 21, taking action 1, observation [-0.15387802  1.6185063  -0.7127341   0.30494753  0.13119766  0.16813657\n",
      "  0.          0.        ]\n",
      "episode 18-step 22, taking action 3, observation [-0.16086121  1.6247947  -0.7052096   0.27886668  0.13807371  0.1375332\n",
      "  0.          0.        ]\n",
      "episode 18-step 23, taking action 2, observation [-0.16785526  1.6320293  -0.7069016   0.32083544  0.14555725  0.14968409\n",
      "  0.          0.        ]\n",
      "episode 18-step 24, taking action 3, observation [-0.17475224  1.638685   -0.6952705   0.29524946  0.15111375  0.11113234\n",
      "  0.          0.        ]\n",
      "episode 18-step 25, taking action 1, observation [-0.18174486  1.6447039  -0.70675063  0.26672238  0.15859449  0.1496149\n",
      "  0.          0.        ]\n",
      "episode 18-step 26, taking action 1, observation [-0.18881282  1.6501113  -0.7161838   0.2392993   0.16798554  0.1878214\n",
      "  0.          0.        ]\n",
      "episode 18-step 27, taking action 1, observation [-0.19595861  1.6549059  -0.72593033  0.21177116  0.17935516  0.22739264\n",
      "  0.          0.        ]\n",
      "episode 18-step 28, taking action 0, observation [-0.20310473  1.6591022  -0.72592753  0.18509379  0.19072469  0.2273906\n",
      "  0.          0.        ]\n",
      "episode 18-step 29, taking action 1, observation [-0.21034174  1.6626718  -0.73734915  0.15683499  0.20445383  0.2745828\n",
      "  0.          0.        ]\n",
      "episode 18-step 30, taking action 3, observation [-0.2174891   1.6656783  -0.7259898   0.13202748  0.21581233  0.22717023\n",
      "  0.          0.        ]\n",
      "episode 18-step 31, taking action 3, observation [-0.22457156  1.6681093  -0.71778965  0.10661943  0.22546458  0.19304529\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 18-step 32, taking action 1, observation [-0.2317326   1.6699055  -0.72768486  0.0780204   0.23721153  0.23493871\n",
      "  0.          0.        ]\n",
      "episode 18-step 33, taking action 0, observation [-0.23889394  1.6711036  -0.7276809   0.05134242  0.24895835  0.23493648\n",
      "  0.          0.        ]\n",
      "episode 18-step 34, taking action 1, observation [-0.2461286   1.6716844  -0.73683435  0.02348994  0.26259157  0.27266505\n",
      "  0.          0.        ]\n",
      "episode 18-step 35, taking action 1, observation [-0.2534454   1.6716272  -0.74715495 -0.0054      0.2784385   0.3169385\n",
      "  0.          0.        ]\n",
      "episode 18-step 36, taking action 2, observation [-0.26088828  1.6719587  -0.7599622   0.01166192  0.29456255  0.3224817\n",
      "  0.          0.        ]\n",
      "episode 18-step 37, taking action 3, observation [-0.26823908  1.6717294  -0.7482982  -0.01291941  0.30822197  0.27318865\n",
      "  0.          0.        ]\n",
      "episode 18-step 38, taking action 0, observation [-0.27559036  1.6709025  -0.7482913  -0.03960108  0.32188123  0.27318507\n",
      "  0.          0.        ]\n",
      "episode 18-step 39, taking action 2, observation [-0.2829228   1.6702251  -0.7471227  -0.03325818  0.33635393  0.2894538\n",
      "  0.          0.        ]\n",
      "episode 18-step 40, taking action 0, observation [-0.29025584  1.6689504  -0.7471142  -0.05994154  0.3508264   0.2894496\n",
      "  0.          0.        ]\n",
      "episode 18-step 41, taking action 2, observation [-0.29779667  1.6684419  -0.76833713 -0.02615423  0.36584392  0.3003501\n",
      "  0.          0.        ]\n",
      "episode 18-step 42, taking action 1, observation [-0.30539688  1.6672977  -0.77576464 -0.05496169  0.3825391   0.33390373\n",
      "  0.          0.        ]\n",
      "episode 18-step 43, taking action 2, observation [-0.3133337   1.6666609  -0.8091647  -0.03255103  0.39906427  0.33050346\n",
      "  0.          0.        ]\n",
      "episode 18-step 44, taking action 0, observation [-0.32127142  1.6654273  -0.80915165 -0.05923896  0.41558912  0.33049726\n",
      "  0.          0.        ]\n",
      "episode 18-step 45, taking action 3, observation [-0.32912564  1.6636558  -0.79841167 -0.08262111  0.42964417  0.2811015\n",
      "  0.          0.        ]\n",
      "episode 18-step 46, taking action 0, observation [-0.33698058  1.6612867  -0.7984015  -0.10930295  0.44369906  0.28109762\n",
      "  0.          0.        ]\n",
      "episode 18-step 47, taking action 2, observation [-0.34518304  1.6593466  -0.83298826 -0.09033256  0.4576643   0.27930516\n",
      "  0.          0.        ]\n",
      "episode 18-step 48, taking action 3, observation [-0.35332522  1.656854   -0.82522166 -0.11444159  0.46980664  0.24284658\n",
      "  0.          0.        ]\n",
      "episode 18-step 49, taking action 0, observation [-0.361468    1.6537633  -0.82521325 -0.14111938  0.48194882  0.24284413\n",
      "  0.          0.        ]\n",
      "episode 18-step 50, taking action 0, observation [-0.36961132  1.6500742  -0.82520473 -0.16779707  0.49409094  0.24284172\n",
      "  0.          0.        ]\n",
      "episode 18-step 51, taking action 3, observation [-0.3776733   1.645852   -0.8147708  -0.1907658   0.5037305   0.19279033\n",
      "  0.          0.        ]\n",
      "episode 18-step 52, taking action 0, observation [-0.38573566  1.6410307  -0.81476516 -0.21743934  0.5133699   0.19278911\n",
      "  0.          0.        ]\n",
      "episode 18-step 53, taking action 0, observation [-0.3937984   1.6356105  -0.81475943 -0.24411286  0.5230093   0.19278787\n",
      "  0.          0.        ]\n",
      "episode 18-step 54, taking action 1, observation [-0.401935    1.629519   -0.82419527 -0.27483666  0.5350512   0.2408371\n",
      "  0.          0.        ]\n",
      "episode 18-step 55, taking action 1, observation [-0.4101514   1.6227498  -0.83435553 -0.3059521   0.54969996  0.29297465\n",
      "  0.          0.        ]\n",
      "episode 18-step 56, taking action 1, observation [-0.41845113  1.6153097  -0.8448159  -0.336835    0.56696594  0.3453191\n",
      "  0.          0.        ]\n",
      "episode 18-step 57, taking action 0, observation [-0.42675227  1.6072729  -0.84479606 -0.36352295  0.58423156  0.34531206\n",
      "  0.          0.        ]\n",
      "episode 18-step 58, taking action 2, observation [-0.4353084   1.5992188  -0.87013847 -0.364462    0.6015041   0.34545097\n",
      "  0.          0.        ]\n",
      "episode 18-step 59, taking action 2, observation [-0.44401398  1.5910752  -0.88516986 -0.36873856  0.61909705  0.3518601\n",
      "  0.          0.        ]\n",
      "episode 18-step 60, taking action 0, observation [-0.45272118  1.5823351  -0.88514745 -0.39542663  0.6366896   0.3518526\n",
      "  0.          0.        ]\n",
      "episode 18-step 61, taking action 0, observation [-0.46142992  1.5729982  -0.8851245  -0.4221144   0.6542818   0.35184512\n",
      "  0.          0.        ]\n",
      "episode 18-step 62, taking action 2, observation [-0.47072944  1.5641158  -0.94385844 -0.40200123  0.671685    0.34806362\n",
      "  0.          0.        ]\n",
      "episode 18-step 63, taking action 0, observation [-0.4800307   1.5546365  -0.94383496 -0.42868805  0.6890878   0.34805632\n",
      "  0.          0.        ]\n",
      "episode 18-step 64, taking action 1, observation [-0.48939294  1.5444777  -0.95153016 -0.4600604   0.7088089   0.39442128\n",
      "  0.          0.        ]\n",
      "episode 18-step 65, taking action 3, observation [-0.49870834  1.5337738  -0.94531506 -0.4837388   0.7268491   0.36080313\n",
      "  0.          0.        ]\n",
      "episode 18-step 66, taking action 2, observation [-0.50837326  1.5233481  -0.9804983  -0.47180197  0.7454802   0.37262174\n",
      "  0.          0.        ]\n",
      "episode 18-step 67, taking action 0, observation [-0.5180403   1.5123259  -0.98046887 -0.49849036  0.7641108   0.37261286\n",
      "  0.          0.        ]\n",
      "episode 18-step 68, taking action 1, observation [-0.52777576  1.5006129  -0.98896664 -0.5306244   0.7853968   0.42572063\n",
      "  0.          0.        ]\n",
      "episode 18-step 69, taking action 3, observation [-0.5374472   1.488403   -0.9803171  -0.55158556  0.80393237  0.3707115\n",
      "  0.          0.        ]\n",
      "episode 18-step 70, taking action 2, observation [-0.54760724  1.4761009  -1.0286071  -0.5556307   0.8220278   0.36190885\n",
      "  0.          0.        ]\n",
      "episode 18-step 71, taking action 2, observation [-0.5583901   1.4644401  -1.0912278  -0.5277044   0.84096503  0.37874404\n",
      "  0.          0.        ]\n",
      "episode 18-step 72, taking action 0, observation [-0.56917536  1.4521824  -1.0911944  -0.55439156  0.8599018   0.37873465\n",
      "  0.          0.        ]\n",
      "episode 18-step 73, taking action 0, observation [-0.579963    1.4393277  -1.0911605  -0.5810782   0.87883806  0.3787253\n",
      "  0.          0.        ]\n",
      "episode 18-step 74, taking action 2, observation [-0.5915908   1.4267642  -1.1746624  -0.56810606  0.89741087  0.3714557\n",
      "  0.          0.        ]\n",
      "episode 18-step 75, taking action 3, observation [-0.60316485  1.4137007  -1.1674142  -0.5890974   0.9134021   0.31982416\n",
      "  0.          0.        ]\n",
      "episode 18-step 76, taking action 3, observation [-0.61469597  1.400142   -1.1613963  -0.6098565   0.92688984  0.2697553\n",
      "  0.          0.        ]\n",
      "episode 18-step 77, taking action 0, observation [-0.62622833  1.3859847  -1.1613778  -0.6365324   0.9403774   0.2697519\n",
      "  0.          0.        ]\n",
      "episode 18-step 78, taking action 2, observation [-0.63852125  1.3718339  -1.2369446  -0.6360093   0.95330274  0.25850677\n",
      "  0.          0.        ]\n",
      "episode 18-step 79, taking action 3, observation [-0.65077066  1.357185   -1.231024   -0.65682304  0.9637393   0.20873013\n",
      "  0.          0.        ]\n",
      "episode 18-step 80, taking action 2, observation [-0.66373837  1.3425891  -1.302624   -0.654371    0.97393507  0.20391631\n",
      "  0.          0.        ]\n",
      "episode 18-step 81, taking action 3, observation [-0.6766503   1.3275048  -1.2953486  -0.674536    0.981282    0.14693697\n",
      "  0.          0.        ]\n",
      "episode 18-step 82, taking action 2, observation [-0.69007796  1.3121544  -1.3466269  -0.68610805  0.9881593   0.13754593\n",
      "  0.          0.        ]\n",
      "episode 18-step 83, taking action 1, observation [-0.7035529   1.2961062  -1.3526804  -0.7185438   0.9975217   0.18724813\n",
      "  0.          0.        ]\n",
      "episode 18-step 84, taking action 0, observation [-0.7170285   1.2794586  -1.3526711  -0.7452145   1.0068841   0.18724696\n",
      "  0.          0.        ]\n",
      "episode 18-step 85, taking action 2, observation [-0.7309245   1.262413   -1.3943627  -0.7626383   1.0157343   0.17700265\n",
      "  0.          0.        ]\n",
      "episode 18-step 86, taking action 0, observation [-0.7448211  1.2447681 -1.3943542 -0.7893084  1.0245844  0.1770022\n",
      "  0.         0.       ]\n",
      "episode 18-step 87, taking action 3, observation [-0.75868225  1.2265913  -1.389831   -0.81195813  1.0316684   0.14168121\n",
      "  0.          0.        ]\n",
      "episode 18-step 88, taking action 1, observation [-0.77258766  1.2077098  -1.3955301  -0.84479964  1.0413383   0.19339819\n",
      "  0.          0.        ]\n",
      "episode 18-step 89, taking action 1, observation [-0.786545    1.1881125  -1.4021044  -0.87834847  1.0539147   0.2515263\n",
      "  0.          0.        ]\n",
      "episode 18-step 90, taking action 2, observation [-0.80089885  1.1684198  -1.4418439  -0.8828998   1.0669372   0.26045182\n",
      "  0.          0.        ]\n",
      "episode 18-step 91, taking action 1, observation [-0.8152942   1.1480039  -1.447234   -0.9168416   1.0828854   0.31896347\n",
      "  0.          0.        ]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 18-step 92, taking action 0, observation [-0.8296915   1.1269896  -1.4472058  -0.9435185   1.0988333   0.31895787\n",
      "  0.          0.        ]\n",
      "episode 18-step 93, taking action 0, observation [-0.8440909   1.1053768  -1.4471773  -0.97019506  1.1147809   0.3189523\n",
      "  0.          0.        ]\n",
      "episode 18-step 94, taking action 2, observation [-0.859067    1.0838492  -1.5049331  -0.9669227   1.1314503   0.33338895\n",
      "  0.          0.        ]\n",
      "episode 18-step 95, taking action 0, observation [-0.8740453   1.0617232  -1.5049013  -0.9935995   1.1481194   0.33338258\n",
      "  0.          0.        ]\n",
      "episode 18-step 96, taking action 2, observation [-0.8898134   1.039648   -1.5837548  -0.9916191   1.1651205   0.34002098\n",
      "  0.          0.        ]\n",
      "episode 18-step 97, taking action 1, observation [-0.9056252   1.0168577  -1.5889384  -1.0252483   1.1849443   0.39647475\n",
      "  0.          0.        ]\n",
      "episode 18-step 98, taking action 2, observation [-0.9221804   0.99407375 -1.6630716  -1.0253484   1.2052052   0.40521893\n",
      "  0.          0.        ]\n",
      "episode 18-step 99, taking action 0, observation [-0.938739    0.9706916  -1.6630234  -1.0520278   1.2254655   0.40520746\n",
      "  0.          0.        ]\n",
      "episode 19-step 0, taking action 1, observation [ 0.01280375  1.414226    0.64204156  0.06069634 -0.01290463 -0.10902502\n",
      "  0.          0.        ]\n",
      "episode 19-step 1, taking action 1, observation [ 0.01908464  1.4149997   0.63144076  0.03435027 -0.01622157 -0.06634498\n",
      "  0.          0.        ]\n",
      "episode 19-step 2, taking action 3, observation [ 0.02545433  1.4151629   0.64258146  0.00718584 -0.02177383 -0.11105524\n",
      "  0.          0.        ]\n",
      "episode 19-step 3, taking action 1, observation [ 0.03175039  1.414723    0.6333364  -0.01960521 -0.02546788 -0.07388777\n",
      "  0.          0.        ]\n",
      "episode 19-step 4, taking action 1, observation [ 0.03798666  1.413693    0.62583524 -0.04581868 -0.02765275 -0.04370129\n",
      "  0.          0.        ]\n",
      "episode 19-step 5, taking action 2, observation [ 0.04408016  1.4129843   0.6123215  -0.03155194 -0.03059985 -0.05894735\n",
      "  0.          0.        ]\n",
      "episode 19-step 6, taking action 1, observation [ 0.05009975  1.4116802   0.6030539  -0.05798582 -0.03168383 -0.02168152\n",
      "  0.          0.        ]\n",
      "episode 19-step 7, taking action 0, observation [ 0.05611935  1.4097762   0.6030561  -0.08465065 -0.03276876 -0.02170075\n",
      "  0.          0.        ]\n",
      "episode 19-step 8, taking action 1, observation [ 0.06207371  1.4072833   0.5948599  -0.11078719 -0.03220352  0.01130624\n",
      "  0.          0.        ]\n",
      "episode 19-step 9, taking action 3, observation [ 0.06810637  1.4041812   0.604692   -0.13790204 -0.03361491 -0.02823033\n",
      "  0.          0.        ]\n",
      "episode 19-step 10, taking action 3, observation [ 0.07421684  1.400476    0.61444885 -0.1647511  -0.03698084 -0.06732471\n",
      "  0.          0.        ]\n",
      "episode 19-step 11, taking action 0, observation [ 0.08032751  1.3961711   0.61445963 -0.19142076 -0.04034585 -0.06730612\n",
      "  0.          0.        ]\n",
      "episode 19-step 12, taking action 1, observation [ 0.08637772  1.3912652   0.6068915  -0.21809006 -0.04219314 -0.03694924\n",
      "  0.          0.        ]\n",
      "episode 19-step 13, taking action 1, observation [ 0.09235831  1.3857752   0.5981433  -0.24400268 -0.04227586 -0.00165458\n",
      "  0.          0.        ]\n",
      "episode 19-step 14, taking action 3, observation [ 0.09840832  1.3796699   0.6068633  -0.27139953 -0.04411675 -0.03682128\n",
      "  0.          0.        ]\n",
      "episode 19-step 15, taking action 1, observation [ 0.10439263  1.3729584   0.5986198  -0.29829457 -0.04430857 -0.00383655\n",
      "  0.          0.        ]\n",
      "episode 19-step 16, taking action 1, observation [ 0.11030712  1.3656495   0.58986765 -0.32479334 -0.04274639  0.03124637\n",
      "  0.          0.        ]\n",
      "episode 19-step 17, taking action 2, observation [ 0.11609926  1.3582941   0.57828766 -0.3268809  -0.04183006  0.01832841\n",
      "  0.          0.        ]\n",
      "episode 19-step 18, taking action 0, observation [ 0.1218914   1.3503386   0.578285   -0.35355404 -0.0409129   0.01834471\n",
      "  0.          0.        ]\n",
      "episode 19-step 19, taking action 0, observation [ 0.12768345  1.341783    0.57828224 -0.38022137 -0.03999618  0.0183362\n",
      "  0.          0.        ]\n",
      "episode 19-step 20, taking action 0, observation [ 0.13347559  1.3326274   0.5782794  -0.4068895  -0.03907958  0.01833344\n",
      "  0.          0.        ]\n",
      "episode 19-step 21, taking action 1, observation [ 0.13920307  1.322871    0.57018155 -0.43355647 -0.03654256  0.05074518\n",
      "  0.          0.        ]\n",
      "episode 19-step 22, taking action 1, observation [ 0.1448677   1.3125167   0.56230694 -0.4600893  -0.0324301   0.08225651\n",
      "  0.          0.        ]\n",
      "episode 19-step 23, taking action 2, observation [ 0.15065165  1.3024473   0.57376724 -0.44743404 -0.02785319  0.09154657\n",
      "  0.          0.        ]\n",
      "episode 19-step 24, taking action 2, observation [ 0.15649967  1.2924759   0.5803157  -0.4430952  -0.02341098  0.0888508\n",
      "  0.          0.        ]\n",
      "episode 19-step 25, taking action 0, observation [ 0.16235408  1.281894    0.5805296  -0.47024596 -0.01856698  0.0968798\n",
      "  0.          0.        ]\n",
      "episode 19-step 26, taking action 2, observation [ 0.16828594  1.2718661   0.58798635 -0.44562688 -0.01343099  0.1027201\n",
      "  0.          0.        ]\n",
      "episode 19-step 27, taking action 0, observation [ 0.1742179   1.2612385   0.58798635 -0.4722958  -0.00829499  0.10271974\n",
      "  0.          0.        ]\n",
      "episode 19-step 28, taking action 3, observation [ 0.18023309  1.2500104   0.59844035 -0.49901304 -0.00525264  0.06084727\n",
      "  0.          0.        ]\n",
      "episode 19-step 29, taking action 1, observation [ 1.8615408e-01  1.2381905e+00  5.8661389e-01 -5.2532321e-01\n",
      "  1.5878388e-04  1.0822822e-01  0.0000000e+00  0.0000000e+00]\n",
      "episode 19-step 30, taking action 3, observation [ 0.19214296  1.2257745   0.5951416  -0.5518208   0.0038624   0.07407225\n",
      "  0.          0.        ]\n",
      "episode 19-step 31, taking action 2, observation [ 0.19829245  1.2141765   0.6104289  -0.51548827  0.00832471  0.08924603\n",
      "  0.          0.        ]\n",
      "episode 19-step 32, taking action 1, observation [ 0.2043542   1.2019802   0.5994212  -0.5421103   0.01499119  0.13332954\n",
      "  0.          0.        ]\n",
      "episode 19-step 33, taking action 2, observation [ 0.21043968  1.1902995   0.60162914 -0.5192241   0.02181847  0.13654572\n",
      "  0.          0.        ]\n",
      "episode 19-step 34, taking action 1, observation [ 0.21643381  1.1780071   0.59017485 -0.54649514  0.03094403  0.18251078\n",
      "  0.          0.        ]\n",
      "episode 19-step 35, taking action 3, observation [ 0.2224968   1.1651207   0.59881955 -0.5729013   0.03833609  0.14784123\n",
      "  0.          0.        ]\n",
      "episode 19-step 36, taking action 3, observation [ 0.22863674  1.1516417   0.60845876 -0.59922063  0.04379413  0.10916086\n",
      "  0.          0.        ]\n",
      "episode 19-step 37, taking action 2, observation [ 0.23465261  1.1389706   0.5964544  -0.56331486  0.04886953  0.10150827\n",
      "  0.          0.        ]\n",
      "episode 19-step 38, taking action 3, observation [ 0.24075317  1.1257093   0.6070818  -0.5894915   0.05181019  0.05881299\n",
      "  0.          0.        ]\n",
      "episode 19-step 39, taking action 3, observation [ 0.24693012  1.111855    0.6166655  -0.615783    0.05282682  0.02033261\n",
      "  0.          0.        ]\n",
      "episode 19-step 40, taking action 3, observation [ 0.25317413  1.0974145   0.6250822  -0.6417758   0.05214677 -0.01360099\n",
      "  0.          0.        ]\n",
      "episode 19-step 41, taking action 2, observation [ 0.25920802  1.0834718   0.60487455 -0.61962444  0.05067292 -0.02947681\n",
      "  0.          0.        ]\n",
      "episode 19-step 42, taking action 0, observation [ 0.2652418   1.0689291   0.6048745  -0.6462913   0.04919909 -0.02947672\n",
      "  0.          0.        ]\n",
      "episode 19-step 43, taking action 1, observation [ 0.27119407  1.0537821   0.5946313  -0.67322004  0.04977893  0.01159711\n",
      "  0.          0.        ]\n",
      "episode 19-step 44, taking action 3, observation [ 0.2772278   1.0380507   0.6048759  -0.699134    0.04829555 -0.02966773\n",
      "  0.          0.        ]\n",
      "episode 19-step 45, taking action 3, observation [ 0.2833396   1.0217257   0.6146759  -0.72544885  0.04484548 -0.06900122\n",
      "  0.          0.        ]\n",
      "episode 19-step 46, taking action 2, observation [ 0.2893116   1.0062319   0.6011417  -0.6884964   0.0409469  -0.07797179\n",
      "  0.          0.        ]\n",
      "episode 19-step 47, taking action 1, observation [ 0.2952238   0.9901387   0.5936519  -0.71519196  0.0385474  -0.04799001\n",
      "  0.          0.        ]\n",
      "episode 19-step 48, taking action 3, observation [ 0.301217    0.9734599   0.6038197  -0.74116606  0.03410308 -0.08888637\n",
      "  0.          0.        ]\n",
      "episode 19-step 49, taking action 3, observation [ 0.30730915  0.9561786   0.61622536 -0.7679164   0.02717695 -0.13852258\n",
      "  0.          0.        ]\n",
      "episode 19-step 50, taking action 1, observation [ 0.31330824  0.9383008   0.6045624  -0.794492    0.02258467 -0.09184557\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.          0.        ]\n",
      "episode 19-step 51, taking action 3, observation [ 0.3193783   0.91983306  0.61346275 -0.8207069   0.01620659 -0.12756175\n",
      "  0.          0.        ]\n",
      "episode 19-step 52, taking action 1, observation [ 0.325352    0.9007663   0.60138845 -0.84736973  0.01224618 -0.07920791\n",
      "  0.          0.        ]\n",
      "episode 19-step 53, taking action 1, observation [ 0.33123532  0.8810992   0.59004647 -0.87407964  0.0105572  -0.03377993\n",
      "  0.          0.        ]\n",
      "episode 19-step 54, taking action 2, observation [ 0.33713084  0.86208934  0.5911616  -0.84487414  0.00896485 -0.03184686\n",
      "  0.          0.        ]\n",
      "episode 19-step 55, taking action 1, observation [ 0.3429435   0.8424663   0.58076644 -0.87213546  0.00945618  0.00982652\n",
      "  0.          0.        ]\n",
      "episode 19-step 56, taking action 1, observation [ 0.34866086  0.8222364   0.56881166 -0.8991278   0.01234264  0.05772901\n",
      "  0.          0.        ]\n",
      "episode 19-step 57, taking action 2, observation [ 0.35423392  0.8020326   0.55504    -0.89796823  0.01458613  0.04486995\n",
      "  0.          0.        ]\n",
      "episode 19-step 58, taking action 0, observation [ 0.3598071   0.7812288   0.55504    -0.92463523  0.01682962  0.04486979\n",
      "  0.          0.        ]\n",
      "episode 19-step 59, taking action 1, observation [ 0.36529103  0.7598302   0.5438661  -0.95110434  0.02130921  0.08959163\n",
      "  0.          0.        ]\n",
      "episode 19-step 60, taking action 3, observation [ 0.37084588  0.7378287   0.5527428  -0.9778854   0.02401248  0.05406532\n",
      "  0.          0.        ]\n",
      "episode 19-step 61, taking action 0, observation [ 0.37640065  0.7152274   0.55274284 -1.0045527   0.02671576  0.05406532\n",
      "  0.          0.        ]\n",
      "episode 19-step 62, taking action 0, observation [ 0.38195544  0.6920261   0.55274284 -1.03122     0.02941903  0.05406529\n",
      "  0.          0.        ]\n",
      "episode 19-step 63, taking action 0, observation [ 0.3875102   0.668225    0.55274284 -1.0578873   0.0321223   0.05406534\n",
      "  0.          0.        ]\n",
      "episode 19-step 64, taking action 3, observation [ 0.39313728  0.64382774  0.56182367 -1.0843388   0.03300536  0.01766095\n",
      "  0.          0.        ]\n",
      "episode 19-step 65, taking action 3, observation [ 0.39886075  0.61883444  0.57390314 -1.1107742   0.03146779 -0.03075127\n",
      "  0.          0.        ]\n",
      "episode 19-step 66, taking action 0, observation [ 0.40458423  0.59324133  0.5739031  -1.137441    0.02993024 -0.030751\n",
      "  0.          0.        ]\n",
      "episode 19-step 67, taking action 0, observation [ 0.4103077   0.5670482   0.57390314 -1.1641079   0.02839269 -0.0307511\n",
      "  0.          0.        ]\n",
      "episode 19-step 68, taking action 0, observation [ 0.41603106  0.5402551   0.57390314 -1.1907748   0.02685514 -0.03075098\n",
      "  0.          0.        ]\n",
      "episode 19-step 69, taking action 0, observation [ 0.42175454  0.51286215  0.57390314 -1.2174417   0.02531758 -0.03075101\n",
      "  0.          0.        ]\n",
      "episode 19-step 70, taking action 2, observation [ 0.4276081   0.48579392  0.5862328  -1.2030146   0.02445321 -0.0172873\n",
      "  0.          0.        ]\n",
      "episode 19-step 71, taking action 0, observation [ 0.43346176  0.45812583  0.5862327  -1.2296814   0.02358884 -0.01728726\n",
      "  0.          0.        ]\n",
      "episode 19-step 72, taking action 0, observation [ 0.43931532  0.4298576   0.5862327  -1.2563483   0.02272447 -0.01728733\n",
      "  0.          0.        ]\n",
      "episode 19-step 73, taking action 3, observation [ 0.44522876  0.4009889   0.5937437  -1.2830205   0.0203564  -0.04736133\n",
      "  0.          0.        ]\n",
      "episode 19-step 74, taking action 3, observation [ 0.45122355  0.37153304  0.60395527 -1.3090972   0.01593923 -0.08834337\n",
      "  0.          0.        ]\n",
      "episode 19-step 75, taking action 1, observation [ 0.45714894  0.34147376  0.59524596 -1.3359431   0.01326707 -0.05344315\n",
      "  0.          0.        ]\n",
      "episode 19-step 76, taking action 1, observation [ 0.46301204  0.310804    0.58742833 -1.3630893   0.01216275 -0.02208655\n",
      "  0.          0.        ]\n",
      "episode 19-step 77, taking action 3, observation [ 0.46895647  0.2795334   0.5976324  -1.3897824   0.00901522 -0.06295048\n",
      "  0.          0.        ]\n",
      "episode 19-step 78, taking action 2, observation [ 0.4747406   0.24903925  0.5823163  -1.3552759   0.00516482 -0.07700826\n",
      "  0.          0.        ]\n",
      "episode 19-step 79, taking action 1, observation [ 0.4804488   0.21794079  0.57279366 -1.3821515   0.00322177 -0.03886089\n",
      "  0.          0.        ]\n",
      "episode 19-step 80, taking action 2, observation [ 0.48623124  0.18729909  0.57985705 -1.36185     0.00163203 -0.03179488\n",
      "  0.          0.        ]\n",
      "episode 19-step 81, taking action 0, observation [ 4.9201363e-01  1.5605737e-01  5.7985705e-01 -1.3885169e+00\n",
      "  4.2288171e-05 -3.1794898e-02  0.0000000e+00  0.0000000e+00]\n",
      "episode 19-step 82, taking action 0, observation [ 0.49779615  0.12421583  0.5798571  -1.4151835  -0.00154746 -0.03179484\n",
      "  0.          0.        ]\n",
      "episode 19-step 83, taking action 0, observation [ 0.50357854  0.09177419  0.5798572  -1.4418504  -0.00313719 -0.03179485\n",
      "  1.          0.        ]\n",
      "episode 19-step 84, taking action 1, observation [ 0.508877    0.05993672  0.50582355 -1.4147445   0.01368317  0.3276989\n",
      "  1.          0.        ]\n",
      "episode 19-step 85, taking action 3, observation [ 0.5141412   0.03373655  0.18603179 -0.82134414  0.14637792  5.404044\n",
      "  1.          1.        ]\n",
      "episode 19-step 86, taking action 3, observation [ 5.1613963e-01  2.3905931e-02 -2.5176391e-02 -4.5993403e-03\n",
      "  2.6778135e-01 -1.8403876e-08  1.0000000e+00  1.0000000e+00]\n",
      "episode 19-step 87, taking action 2, observation [ 0.51576006  0.0244953  -0.031106    0.0275174   0.25950435 -0.16947368\n",
      "  0.          1.        ]\n",
      "episode 19-step 88, taking action 1, observation [ 0.51519173  0.02505277 -0.04805528  0.02623293  0.25055078 -0.17987023\n",
      "  0.          1.        ]\n",
      "episode 19-step 89, taking action 3, observation [ 0.5146697   0.02563177 -0.04332472  0.02717244  0.24150217 -0.18153155\n",
      "  0.          1.        ]\n",
      "episode 19-step 90, taking action 1, observation [ 0.51402587  0.02592109 -0.05812718  0.01383645  0.23515034 -0.12744692\n",
      "  0.          1.        ]\n",
      "episode 19-step 91, taking action 3, observation [ 0.5134405   0.02610506 -0.05372617  0.00891137  0.23026076 -0.09809642\n",
      "  0.          1.        ]\n",
      "episode 19-step 92, taking action 0, observation [ 5.1282978e-01  2.6115499e-02 -5.8087002e-02  9.0685033e-04\n",
      "  2.2724353e-01 -6.0582101e-02  0.0000000e+00  1.0000000e+00]\n",
      "episode 19-step 93, taking action 2, observation [ 0.5121767   0.02644673 -0.05967998  0.01558118  0.22153588 -0.11416866\n",
      "  0.          1.        ]\n",
      "episode 19-step 94, taking action 2, observation [ 0.511559    0.02704597 -0.0562749   0.02745211  0.21597181 -0.11129216\n",
      "  0.          1.        ]\n",
      "episode 19-step 95, taking action 3, observation [ 0.5110431   0.02721935 -0.04713554  0.00835148  0.21145792 -0.09029222\n",
      "  0.          1.        ]\n",
      "episode 19-step 96, taking action 2, observation [ 0.5105031   0.02773891 -0.05012188  0.02364143  0.20754372 -0.0782918\n",
      "  0.          1.        ]\n",
      "episode 19-step 97, taking action 3, observation [ 0.51005733  0.02768606 -0.03826941 -0.00147454  0.20118208 -0.1272446\n",
      "  0.          1.        ]\n",
      "episode 19-step 98, taking action 3, observation [ 0.509723    0.0275787  -0.03307498 -0.00471813  0.20082428 -0.00713941\n",
      "  1.          1.        ]\n",
      "episode 19-step 99, taking action 2, observation [ 0.50924176  0.02854758 -0.0479434   0.04308555  0.20066023 -0.00326374\n",
      "  1.          1.        ]\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('LunarLander-v2')\n",
    "num_episodes = 20\n",
    "num_maxstep = 100\n",
    "\n",
    "agent_id = 1\n",
    "print(env.action_space)\n",
    "if agent_id == 1:\n",
    "    agent = RandomAgent(env.action_space)\n",
    "elif agent_id == 2:\n",
    "    agent = BiasedAgent(env.action_space)\n",
    "\n",
    "reward = 0\n",
    "done = False\n",
    "\n",
    "for i_episode in range(num_episodes):\n",
    "    observation = env.reset()\n",
    "    for t in range(num_maxstep):\n",
    "        env.render()\n",
    "        action = agent.act(observation, reward, done)\n",
    "        observation, reward, done, info = env.step(action)\n",
    "        print('episode {}-step {}, taking action {}, observation {}'.format(i_episode, t, action, observation))\n",
    "#if done and False:\n",
    "#    print(\"Episode finished after {} timesteps\".format(t+1))\n",
    "#    break\n",
    "env.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['adventure', 'air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis', 'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival', 'centipede', 'chopper_command', 'crazy_climber', 'defender', 'demon_attack', 'double_dunk', 'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar', 'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kaboom', 'kangaroo', 'krull', 'kung_fu_master', 'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan', 'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing', 'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down', 'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']\n"
     ]
    }
   ],
   "source": [
    "import gym\n",
    "import atari_py\n",
    "print(atari_py.list_games())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "torch",
   "language": "python",
   "name": "torch"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
